diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 13:31:29 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 13:31:29 -0800 |
commit | 0f1d6dfe03ca4e36132221b918499c6f0b0f048d (patch) | |
tree | 0de8e9330610190a23e173ca7d7f3fb74a517aa2 | |
parent | d05c5f7ba164aed3db02fb188c26d0dd94f5455b (diff) | |
parent | 04b46fbdea5e31ffd745a34fa61269a69ba9f47a (diff) | |
download | op-kernel-dev-0f1d6dfe03ca4e36132221b918499c6f0b0f048d.zip op-kernel-dev-0f1d6dfe03ca4e36132221b918499c6f0b0f048d.tar.gz |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"Here is the crypto update for 4.10:
API:
- add skcipher walk interface
- add asynchronous compression (acomp) interface
- fix algif_aed AIO handling of zero buffer
Algorithms:
- fix unaligned access in poly1305
- fix DRBG output to large buffers
Drivers:
- add support for iMX6UL to caam
- fix givenc descriptors (used by IPsec) in caam
- accelerated SHA256/SHA512 for ARM64 from OpenSSL
- add SSE CRCT10DIF and CRC32 to ARM/ARM64
- add AEAD support to Chelsio chcr
- add Armada 8K support to omap-rng"
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (148 commits)
crypto: testmgr - fix overlap in chunked tests again
crypto: arm/crc32 - accelerated support based on x86 SSE implementation
crypto: arm64/crc32 - accelerated support based on x86 SSE implementation
crypto: arm/crct10dif - port x86 SSE implementation to ARM
crypto: arm64/crct10dif - port x86 SSE implementation to arm64
crypto: testmgr - add/enhance test cases for CRC-T10DIF
crypto: testmgr - avoid overlap in chunked tests
crypto: chcr - checking for IS_ERR() instead of NULL
crypto: caam - check caam_emi_slow instead of re-lookup platform
crypto: algif_aead - fix AIO handling of zero buffer
crypto: aes-ce - Make aes_simd_algs static
crypto: algif_skcipher - set error code when kcalloc fails
crypto: caam - make aamalg_desc a proper module
crypto: caam - pass key buffers with typesafe pointers
crypto: arm64/aes-ce-ccm - Fix AEAD decryption length
MAINTAINERS: add crypto headers to crypto entry
crypt: doc - remove misleading mention of async API
crypto: doc - fix header file name
crypto: api - fix comment typo
crypto: skcipher - Add separate walker for AEAD decryption
..
151 files changed, 15711 insertions, 4462 deletions
diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt index beda682..45d943f 100644 --- a/Documentation/crypto/api-intro.txt +++ b/Documentation/crypto/api-intro.txt @@ -44,12 +44,9 @@ one block while the former can operate on an arbitrary amount of data, subject to block size requirements (i.e., non-stream ciphers can only process multiples of blocks). -Support for hardware crypto devices via an asynchronous interface is -under development. - Here's an example of how to use the API: - #include <crypto/ahash.h> + #include <crypto/hash.h> #include <linux/err.h> #include <linux/scatterlist.h> diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt index adeca34..10a425f 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt @@ -123,6 +123,9 @@ PROPERTIES EXAMPLE + +iMX6QDL/SX requires four clocks + crypto@300000 { compatible = "fsl,sec-v4.0"; fsl,sec-era = <2>; @@ -139,6 +142,23 @@ EXAMPLE clock-names = "mem", "aclk", "ipg", "emi_slow"; }; + +iMX6UL does only require three clocks + + crypto: caam@2140000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x2140000 0x3c000>; + ranges = <0 0x2140000 0x3c000>; + interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>; + + clocks = <&clks IMX6UL_CLK_CAAM_MEM>, + <&clks IMX6UL_CLK_CAAM_ACLK>, + <&clks IMX6UL_CLK_CAAM_IPG>; + clock-names = "mem", "aclk", "ipg"; + }; + ===================================================================== Job Ring (JR) Node diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt index 6a62acd..4714772 100644 --- a/Documentation/devicetree/bindings/rng/omap_rng.txt +++ b/Documentation/devicetree/bindings/rng/omap_rng.txt @@ -1,4 +1,4 @@ -OMAP SoC HWRNG Module +OMAP SoC and Inside-Secure HWRNG Module Required properties: @@ -6,11 +6,13 @@ Required properties: RNG versions: - "ti,omap2-rng" for OMAP2. - "ti,omap4-rng" for OMAP4, OMAP5 and AM33XX. + - "inside-secure,safexcel-eip76" for SoCs with EIP76 IP block Note that these two versions are incompatible. - ti,hwmods: Name of the hwmod associated with the RNG module - reg : Offset and length of the register set for the module - interrupts : the interrupt number for the RNG module. - Only used for "ti,omap4-rng". + Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76" +- clocks: the trng clock source Example: /* AM335x */ @@ -20,3 +22,11 @@ rng: rng@48310000 { reg = <0x48310000 0x2000>; interrupts = <111>; }; + +/* SafeXcel IP-76 */ +trng: rng@f2760000 { + compatible = "inside-secure,safexcel-eip76"; + reg = <0xf2760000 0x7d>; + interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cpm_syscon0 1 25>; +}; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 9837175..078834a 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -137,6 +137,7 @@ infineon Infineon Technologies inforce Inforce Computing ingenic Ingenic Semiconductor innolux Innolux Corporation +inside-secure INSIDE Secure intel Intel Corporation intercontrol Inter Control Group invensense InvenSense Inc. diff --git a/MAINTAINERS b/MAINTAINERS index 60a01bd..59c9895 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3470,6 +3470,7 @@ F: arch/*/crypto/ F: crypto/ F: drivers/crypto/ F: include/crypto/ +F: include/linux/crypto* CRYPTOGRAPHIC RANDOM NUMBER GENERATOR M: Neil Horman <nhorman@tuxdriver.com> @@ -5086,6 +5087,14 @@ F: include/linux/fb.h F: include/uapi/video/ F: include/uapi/linux/fb.h +FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER +M: Horia Geantă <horia.geanta@nxp.com> +M: Dan Douglass <dan.douglass@nxp.com> +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/caam/ +F: Documentation/devicetree/bindings/crypto/fsl-sec4.txt + FREESCALE DIU FRAMEBUFFER DRIVER M: Timur Tabi <timur@tabi.org> L: linux-fbdev@vger.kernel.org diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 27ed1b1..13f1b4c 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -88,9 +88,9 @@ config CRYPTO_AES_ARM config CRYPTO_AES_ARM_BS tristate "Bit sliced AES using NEON instructions" depends on KERNEL_MODE_NEON - select CRYPTO_ALGAPI select CRYPTO_AES_ARM - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER + select CRYPTO_SIMD help Use a faster and more secure NEON based implementation of AES in CBC, CTR and XTS modes @@ -104,8 +104,8 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON - select CRYPTO_ALGAPI - select CRYPTO_ABLK_HELPER + select CRYPTO_BLKCIPHER + select CRYPTO_SIMD help Use an implementation of AES in CBC, CTR and XTS modes that uses ARMv8 Crypto Extensions @@ -120,4 +120,14 @@ config CRYPTO_GHASH_ARM_CE that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64) that is part of the ARMv8 Crypto Extensions +config CRYPTO_CRCT10DIF_ARM_CE + tristate "CRCT10DIF digest algorithm using PMULL instructions" + depends on KERNEL_MODE_NEON && CRC_T10DIF + select CRYPTO_HASH + +config CRYPTO_CRC32_ARM_CE + tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" + depends on KERNEL_MODE_NEON && CRC32 + select CRYPTO_HASH + endif diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index fc51507..b578a18 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -13,6 +13,8 @@ ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o +ce-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o ifneq ($(ce-obj-y)$(ce-obj-m),) ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y) @@ -36,6 +38,8 @@ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o +crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o +crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index aef022a..8857531 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -12,8 +12,8 @@ #include <asm/neon.h> #include <asm/hwcap.h> #include <crypto/aes.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> #include <linux/module.h> #include <crypto/xts.h> @@ -88,8 +88,13 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8); rko[0] = rko[0] ^ rki[0] ^ rcon[i]; +#else + rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8); + rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24); +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; @@ -128,17 +133,17 @@ static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, return 0; } -static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key, +static int ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; ret = ce_aes_expandkey(ctx, in_key, key_len); if (!ret) return 0; - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } @@ -147,13 +152,13 @@ struct crypto_aes_xts_ctx { struct crypto_aes_ctx __aligned(8) key2; }; -static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ret = xts_check_key(tfm, in_key, key_len); + ret = xts_verify_key(tfm, in_key, key_len); if (ret) return ret; @@ -164,130 +169,113 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, if (!ret) return 0; - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int blocks; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, num_rounds(ctx), blocks); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int blocks; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, num_rounds(ctx), blocks); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int blocks; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int blocks; int err; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, num_rounds(ctx), blocks, walk.iv); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; int err, blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) { ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); - nbytes -= blocks * AES_BLOCK_SIZE; - if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) - break; - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes % AES_BLOCK_SIZE) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + if (walk.nbytes) { u8 __aligned(8) tail[AES_BLOCK_SIZE]; + unsigned int nbytes = walk.nbytes; + u8 *tdst = walk.dst.virt.addr; + u8 *tsrc = walk.src.virt.addr; /* * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need @@ -298,231 +286,172 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, num_rounds(ctx), blocks, walk.iv); memcpy(tdst, tail, nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); return err; } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = num_rounds(&ctx->key1); - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, walk.iv, (u8 *)ctx->key2.key_enc, first); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = num_rounds(&ctx->key1); - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, walk.iv, (u8 *)ctx->key2.key_enc, first); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static struct crypto_alg aes_algs[] = { { - .cra_name = "__ecb-aes-ce", - .cra_driver_name = "__driver-ecb-aes-ce", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = ce_aes_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, +static struct skcipher_alg aes_algs[] = { { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, }, { - .cra_name = "__cbc-aes-ce", - .cra_driver_name = "__driver-cbc-aes-ce", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ce_aes_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, }, { - .cra_name = "__ctr-aes-ce", - .cra_driver_name = "__driver-ctr-aes-ce", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ce_aes_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = ce_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, }, { - .cra_name = "__xts-aes-ce", - .cra_driver_name = "__driver-xts-aes-ce", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_set_key, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-ce", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-ce", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, } }; +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); +} + static int __init aes_init(void) { + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + if (!(elf_hwcap2 & HWCAP2_AES)) return -ENODEV; - return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); -} -static void __exit aes_exit(void) -{ - crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + + return 0; + +unregister_simds: + aes_exit(); + return err; } module_init(aes_init); diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 0511a6c..d8e06de 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -10,8 +10,9 @@ #include <asm/neon.h> #include <crypto/aes.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> +#include <crypto/cbc.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> #include <linux/module.h> #include <crypto/xts.h> @@ -55,14 +56,14 @@ struct aesbs_xts_ctx { struct AES_KEY twkey; }; -static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int aesbs_cbc_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); int bits = key_len * 8; if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } ctx->dec.rk = ctx->enc; @@ -71,33 +72,33 @@ static int aesbs_cbc_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int aesbs_ctr_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int aesbs_ctr_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct aesbs_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); int bits = key_len * 8; if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } ctx->enc.converted = 0; return 0; } -static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int aesbs_xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int bits = key_len * 4; int err; - err = xts_check_key(tfm, in_key, key_len); + err = xts_verify_key(tfm, in_key, key_len); if (err) return err; if (private_AES_set_encrypt_key(in_key, bits, &ctx->enc.rk)) { - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } ctx->dec.rk = ctx->enc.rk; @@ -107,88 +108,52 @@ static int aesbs_xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, return 0; } -static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static inline void aesbs_encrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) { - struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; - int err; + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + AES_encrypt(src, dst, &ctx->enc); +} - while (walk.nbytes) { - u32 blocks = walk.nbytes / AES_BLOCK_SIZE; - u8 *src = walk.src.virt.addr; +static int aesbs_cbc_encrypt(struct skcipher_request *req) +{ + return crypto_cbc_encrypt_walk(req, aesbs_encrypt_one); +} - if (walk.dst.virt.addr == walk.src.virt.addr) { - u8 *iv = walk.iv; - - do { - crypto_xor(src, iv, AES_BLOCK_SIZE); - AES_encrypt(src, src, &ctx->enc); - iv = src; - src += AES_BLOCK_SIZE; - } while (--blocks); - memcpy(walk.iv, iv, AES_BLOCK_SIZE); - } else { - u8 *dst = walk.dst.virt.addr; - - do { - crypto_xor(walk.iv, src, AES_BLOCK_SIZE); - AES_encrypt(walk.iv, dst, &ctx->enc); - memcpy(walk.iv, dst, AES_BLOCK_SIZE); - src += AES_BLOCK_SIZE; - dst += AES_BLOCK_SIZE; - } while (--blocks); - } - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); - } - return err; +static inline void aesbs_decrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) +{ + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + + AES_decrypt(src, dst, &ctx->dec.rk); } -static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int aesbs_cbc_decrypt(struct skcipher_request *req) { - struct aesbs_cbc_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); - - while ((walk.nbytes / AES_BLOCK_SIZE) >= 8) { - kernel_neon_begin(); - bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - walk.nbytes, &ctx->dec, walk.iv); - kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); - } - while (walk.nbytes) { - u32 blocks = walk.nbytes / AES_BLOCK_SIZE; + for (err = skcipher_walk_virt(&walk, req, false); + (nbytes = walk.nbytes); err = skcipher_walk_done(&walk, nbytes)) { + u32 blocks = nbytes / AES_BLOCK_SIZE; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; - u8 bk[2][AES_BLOCK_SIZE]; u8 *iv = walk.iv; - do { - if (walk.dst.virt.addr == walk.src.virt.addr) - memcpy(bk[blocks & 1], src, AES_BLOCK_SIZE); - - AES_decrypt(src, dst, &ctx->dec.rk); - crypto_xor(dst, iv, AES_BLOCK_SIZE); - - if (walk.dst.virt.addr == walk.src.virt.addr) - iv = bk[blocks & 1]; - else - iv = src; + if (blocks >= 8) { + kernel_neon_begin(); + bsaes_cbc_encrypt(src, dst, nbytes, &ctx->dec, iv); + kernel_neon_end(); + nbytes %= AES_BLOCK_SIZE; + continue; + } - dst += AES_BLOCK_SIZE; - src += AES_BLOCK_SIZE; - } while (--blocks); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + nbytes = crypto_cbc_decrypt_blocks(&walk, tfm, + aesbs_decrypt_one); } return err; } @@ -206,17 +171,15 @@ static void inc_be128_ctr(__be32 ctr[], u32 addend) } } -static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int aesbs_ctr_encrypt(struct skcipher_request *req) { - struct aesbs_ctr_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; u32 blocks; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((blocks = walk.nbytes / AES_BLOCK_SIZE)) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; @@ -235,11 +198,7 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, kernel_neon_end(); inc_be128_ctr(ctr, blocks); - nbytes -= blocks * AES_BLOCK_SIZE; - if (nbytes && nbytes == tail && nbytes <= AES_BLOCK_SIZE) - break; - - err = blkcipher_walk_done(desc, &walk, tail); + err = skcipher_walk_done(&walk, tail); } if (walk.nbytes) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; @@ -248,23 +207,21 @@ static int aesbs_ctr_encrypt(struct blkcipher_desc *desc, AES_encrypt(walk.iv, ks, &ctx->enc.rk); if (tdst != tsrc) - memcpy(tdst, tsrc, nbytes); - crypto_xor(tdst, ks, nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + memcpy(tdst, tsrc, walk.nbytes); + crypto_xor(tdst, ks, walk.nbytes); + err = skcipher_walk_done(&walk, 0); } return err; } -static int aesbs_xts_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int aesbs_xts_encrypt(struct skcipher_request *req) { - struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); /* generate the initial tweak */ AES_encrypt(walk.iv, walk.iv, &ctx->twkey); @@ -274,21 +231,19 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc, bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->enc, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } -static int aesbs_xts_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int aesbs_xts_decrypt(struct skcipher_request *req) { - struct aesbs_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 8 * AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); /* generate the initial tweak */ AES_encrypt(walk.iv, walk.iv, &ctx->twkey); @@ -298,141 +253,110 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc, bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } -static struct crypto_alg aesbs_algs[] = { { - .cra_name = "__cbc-aes-neonbs", - .cra_driver_name = "__driver-cbc-aes-neonbs", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_set_key, - .encrypt = aesbs_cbc_encrypt, - .decrypt = aesbs_cbc_decrypt, +static struct skcipher_alg aesbs_algs[] = { { + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_cbc_set_key, + .encrypt = aesbs_cbc_encrypt, + .decrypt = aesbs_cbc_decrypt, }, { - .cra_name = "__ctr-aes-neonbs", - .cra_driver_name = "__driver-ctr-aes-neonbs", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_ctr_set_key, - .encrypt = aesbs_ctr_encrypt, - .decrypt = aesbs_ctr_encrypt, + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesbs_ctr_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = aesbs_ctr_set_key, + .encrypt = aesbs_ctr_encrypt, + .decrypt = aesbs_ctr_encrypt, }, { - .cra_name = "__xts-aes-neonbs", - .cra_driver_name = "__driver-xts-aes-neonbs", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesbs_xts_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_xts_set_key, - .encrypt = aesbs_xts_encrypt, - .decrypt = aesbs_xts_decrypt, + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-neonbs", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesbs_xts_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-neonbs", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesbs_xts_set_key, + .encrypt = aesbs_xts_encrypt, + .decrypt = aesbs_xts_decrypt, } }; +struct simd_skcipher_alg *aesbs_simd_algs[ARRAY_SIZE(aesbs_algs)]; + +static void aesbs_mod_exit(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aesbs_simd_algs) && aesbs_simd_algs[i]; i++) + simd_skcipher_free(aesbs_simd_algs[i]); + + crypto_unregister_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); +} + static int __init aesbs_mod_init(void) { + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + if (!cpu_has_neon()) return -ENODEV; - return crypto_register_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); -} + err = crypto_register_skciphers(aesbs_algs, ARRAY_SIZE(aesbs_algs)); + if (err) + return err; -static void __exit aesbs_mod_exit(void) -{ - crypto_unregister_algs(aesbs_algs, ARRAY_SIZE(aesbs_algs)); + for (i = 0; i < ARRAY_SIZE(aesbs_algs); i++) { + algname = aesbs_algs[i].base.cra_name + 2; + drvname = aesbs_algs[i].base.cra_driver_name + 2; + basename = aesbs_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aesbs_simd_algs[i] = simd; + } + + return 0; + +unregister_simds: + aesbs_mod_exit(); + return err; } module_init(aesbs_mod_init); diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S new file mode 100644 index 0000000..e63d400 --- /dev/null +++ b/arch/arm/crypto/crc32-ce-core.S @@ -0,0 +1,306 @@ +/* + * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> + * Alexander Boyko <Alexander_Boyko@xyratex.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 6 + .arch armv8-a + .arch_extension crc + .fpu crypto-neon-fp-armv8 + +.Lcrc32_constants: + /* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ + .quad 0x0000000154442bd4 + .quad 0x00000001c6e41596 + + /* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ + .quad 0x00000001751997d0 + .quad 0x00000000ccaa009e + + /* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ + .quad 0x0000000163cd6124 + .quad 0x00000000FFFFFFFF + + /* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` + * = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ + .quad 0x00000001DB710641 + .quad 0x00000001F7011641 + +.Lcrc32c_constants: + .quad 0x00000000740eef02 + .quad 0x000000009e4addf8 + .quad 0x00000000f20c0dfe + .quad 0x000000014cd00bd6 + .quad 0x00000000dd45aab8 + .quad 0x00000000FFFFFFFF + .quad 0x0000000105ec76f0 + .quad 0x00000000dea713f1 + + dCONSTANTl .req d0 + dCONSTANTh .req d1 + qCONSTANT .req q0 + + BUF .req r0 + LEN .req r1 + CRC .req r2 + + qzr .req q9 + + /** + * Calculate crc32 + * BUF - buffer + * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pmull_le(unsigned char const *buffer, + * size_t len, uint crc32) + */ +ENTRY(crc32_pmull_le) + adr r3, .Lcrc32_constants + b 0f + +ENTRY(crc32c_pmull_le) + adr r3, .Lcrc32c_constants + +0: bic LEN, LEN, #15 + vld1.8 {q1-q2}, [BUF, :128]! + vld1.8 {q3-q4}, [BUF, :128]! + vmov.i8 qzr, #0 + vmov.i8 qCONSTANT, #0 + vmov dCONSTANTl[0], CRC + veor.8 d2, d2, dCONSTANTl + sub LEN, LEN, #0x40 + cmp LEN, #0x40 + blt less_64 + + vld1.64 {qCONSTANT}, [r3] + +loop_64: /* 64 bytes Full cache line folding */ + sub LEN, LEN, #0x40 + + vmull.p64 q5, d3, dCONSTANTh + vmull.p64 q6, d5, dCONSTANTh + vmull.p64 q7, d7, dCONSTANTh + vmull.p64 q8, d9, dCONSTANTh + + vmull.p64 q1, d2, dCONSTANTl + vmull.p64 q2, d4, dCONSTANTl + vmull.p64 q3, d6, dCONSTANTl + vmull.p64 q4, d8, dCONSTANTl + + veor.8 q1, q1, q5 + vld1.8 {q5}, [BUF, :128]! + veor.8 q2, q2, q6 + vld1.8 {q6}, [BUF, :128]! + veor.8 q3, q3, q7 + vld1.8 {q7}, [BUF, :128]! + veor.8 q4, q4, q8 + vld1.8 {q8}, [BUF, :128]! + + veor.8 q1, q1, q5 + veor.8 q2, q2, q6 + veor.8 q3, q3, q7 + veor.8 q4, q4, q8 + + cmp LEN, #0x40 + bge loop_64 + +less_64: /* Folding cache line into 128bit */ + vldr dCONSTANTl, [r3, #16] + vldr dCONSTANTh, [r3, #24] + + vmull.p64 q5, d3, dCONSTANTh + vmull.p64 q1, d2, dCONSTANTl + veor.8 q1, q1, q5 + veor.8 q1, q1, q2 + + vmull.p64 q5, d3, dCONSTANTh + vmull.p64 q1, d2, dCONSTANTl + veor.8 q1, q1, q5 + veor.8 q1, q1, q3 + + vmull.p64 q5, d3, dCONSTANTh + vmull.p64 q1, d2, dCONSTANTl + veor.8 q1, q1, q5 + veor.8 q1, q1, q4 + + teq LEN, #0 + beq fold_64 + +loop_16: /* Folding rest buffer into 128bit */ + subs LEN, LEN, #0x10 + + vld1.8 {q2}, [BUF, :128]! + vmull.p64 q5, d3, dCONSTANTh + vmull.p64 q1, d2, dCONSTANTl + veor.8 q1, q1, q5 + veor.8 q1, q1, q2 + + bne loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + vmull.p64 q2, d2, dCONSTANTh + vext.8 q1, q1, qzr, #8 + veor.8 q1, q1, q2 + + /* final 32-bit fold */ + vldr dCONSTANTl, [r3, #32] + vldr d6, [r3, #40] + vmov.i8 d7, #0 + + vext.8 q2, q1, qzr, #4 + vand.8 d2, d2, d6 + vmull.p64 q1, d2, dCONSTANTl + veor.8 q1, q1, q2 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ + vldr dCONSTANTl, [r3, #48] + vldr dCONSTANTh, [r3, #56] + + vand.8 q2, q1, q3 + vext.8 q2, qzr, q2, #8 + vmull.p64 q2, d5, dCONSTANTh + vand.8 q2, q2, q3 + vmull.p64 q2, d4, dCONSTANTl + veor.8 q1, q1, q2 + vmov r0, s5 + + bx lr +ENDPROC(crc32_pmull_le) +ENDPROC(crc32c_pmull_le) + + .macro __crc32, c + subs ip, r2, #8 + bmi .Ltail\c + + tst r1, #3 + bne .Lunaligned\c + + teq ip, #0 +.Laligned8\c: + ldrd r2, r3, [r1], #8 +ARM_BE8(rev r2, r2 ) +ARM_BE8(rev r3, r3 ) + crc32\c\()w r0, r0, r2 + crc32\c\()w r0, r0, r3 + bxeq lr + subs ip, ip, #8 + bpl .Laligned8\c + +.Ltail\c: + tst ip, #4 + beq 2f + ldr r3, [r1], #4 +ARM_BE8(rev r3, r3 ) + crc32\c\()w r0, r0, r3 + +2: tst ip, #2 + beq 1f + ldrh r3, [r1], #2 +ARM_BE8(rev16 r3, r3 ) + crc32\c\()h r0, r0, r3 + +1: tst ip, #1 + bxeq lr + ldrb r3, [r1] + crc32\c\()b r0, r0, r3 + bx lr + +.Lunaligned\c: + tst r1, #1 + beq 2f + ldrb r3, [r1], #1 + subs r2, r2, #1 + crc32\c\()b r0, r0, r3 + + tst r1, #2 + beq 0f +2: ldrh r3, [r1], #2 + subs r2, r2, #2 +ARM_BE8(rev16 r3, r3 ) + crc32\c\()h r0, r0, r3 + +0: subs ip, r2, #8 + bpl .Laligned8\c + b .Ltail\c + .endm + + .align 5 +ENTRY(crc32_armv8_le) + __crc32 +ENDPROC(crc32_armv8_le) + + .align 5 +ENTRY(crc32c_armv8_le) + __crc32 c +ENDPROC(crc32c_armv8_le) diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c new file mode 100644 index 0000000..e1566be --- /dev/null +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -0,0 +1,242 @@ +/* + * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/hash.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/unaligned.h> + +#define PMULL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pmull_le_16 */ +#define SCALE_F 16L /* size of NEON register */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len); +static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len); + +static int crc32_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + return 0; +} + +static int crc32c_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + return 0; +} + +static int crc32_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crc = shash_desc_ctx(desc); + + *crc = *mctx; + return 0; +} + +static int crc32_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + + *crc = crc32_armv8_le(*crc, data, length); + return 0; +} + +static int crc32c_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + + *crc = crc32c_armv8_le(*crc, data, length); + return 0; +} + +static int crc32_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(*crc, out); + return 0; +} + +static int crc32c_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(~*crc, out); + return 0; +} + +static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if (may_use_simd()) { + if ((u32)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); + + *crc = fallback_crc32(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin(); + *crc = crc32_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + } + + if (length > 0) + *crc = fallback_crc32(*crc, data, length); + + return 0; +} + +static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if (may_use_simd()) { + if ((u32)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); + + *crc = fallback_crc32c(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin(); + *crc = crc32c_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + } + + if (length > 0) + *crc = fallback_crc32c(*crc, data, length); + + return 0; +} + +static struct shash_alg crc32_pmull_algs[] = { { + .setkey = crc32_setkey, + .init = crc32_init, + .update = crc32_update, + .final = crc32_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32_cra_init, + .base.cra_name = "crc32", + .base.cra_driver_name = "crc32-arm-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +}, { + .setkey = crc32_setkey, + .init = crc32_init, + .update = crc32c_update, + .final = crc32c_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32c_cra_init, + .base.cra_name = "crc32c", + .base.cra_driver_name = "crc32c-arm-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +} }; + +static int __init crc32_pmull_mod_init(void) +{ + if (elf_hwcap2 & HWCAP2_PMULL) { + crc32_pmull_algs[0].update = crc32_pmull_update; + crc32_pmull_algs[1].update = crc32c_pmull_update; + + if (elf_hwcap2 & HWCAP2_CRC32) { + fallback_crc32 = crc32_armv8_le; + fallback_crc32c = crc32c_armv8_le; + } else { + fallback_crc32 = crc32_le; + fallback_crc32c = __crc32c_le; + } + } else if (!(elf_hwcap2 & HWCAP2_CRC32)) { + return -ENODEV; + } + + return crypto_register_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +static void __exit crc32_pmull_mod_exit(void) +{ + crypto_unregister_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +module_init(crc32_pmull_mod_init); +module_exit(crc32_pmull_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("crc32"); +MODULE_ALIAS_CRYPTO("crc32c"); diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S new file mode 100644 index 0000000..ce45ba0 --- /dev/null +++ b/arch/arm/crypto/crct10dif-ce-core.S @@ -0,0 +1,427 @@ +// +// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions +// +// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// + +// +// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +// +// Copyright (c) 2013, Intel Corporation +// +// Authors: +// Erdinc Ozturk <erdinc.ozturk@intel.com> +// Vinodh Gopal <vinodh.gopal@intel.com> +// James Guilford <james.guilford@intel.com> +// Tim Chen <tim.c.chen@linux.intel.com> +// +// This software is available to you under a choice of one of two +// licenses. You may choose to be licensed under the terms of the GNU +// General Public License (GPL) Version 2, available from the file +// COPYING in the main directory of this source tree, or the +// OpenIB.org BSD license below: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// * Neither the name of the Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Function API: +// UINT16 crc_t10dif_pcl( +// UINT16 init_crc, //initial CRC value, 16 bits +// const unsigned char *buf, //buffer pointer to calculate CRC on +// UINT64 len //buffer length in bytes (64-bit data) +// ); +// +// Reference paper titled "Fast CRC Computation for Generic +// Polynomials Using PCLMULQDQ Instruction" +// URL: http://www.intel.com/content/dam/www/public/us/en/documents +// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +// +// + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define CPU_LE(code...) +#else +#define CPU_LE(code...) code +#endif + + .text + .fpu crypto-neon-fp-armv8 + + arg1_low32 .req r0 + arg2 .req r1 + arg3 .req r2 + + qzr .req q13 + + q0l .req d0 + q0h .req d1 + q1l .req d2 + q1h .req d3 + q2l .req d4 + q2h .req d5 + q3l .req d6 + q3h .req d7 + q4l .req d8 + q4h .req d9 + q5l .req d10 + q5h .req d11 + q6l .req d12 + q6h .req d13 + q7l .req d14 + q7h .req d15 + +ENTRY(crc_t10dif_pmull) + vmov.i8 qzr, #0 // init zero register + + // adjust the 16-bit initial_crc value, scale it to 32 bits + lsl arg1_low32, arg1_low32, #16 + + // check if smaller than 256 + cmp arg3, #256 + + // for sizes less than 128, we can't fold 64B at a time... + blt _less_than_128 + + // load the initial crc value + // crc value does not need to be byte-reflected, but it needs + // to be moved to the high part of the register. + // because data will be byte-reflected and will align with + // initial crc at correct place. + vmov s0, arg1_low32 // initial crc + vext.8 q10, qzr, q0, #4 + + // receive the initial 64B data, xor the initial crc value + vld1.64 {q0-q1}, [arg2, :128]! + vld1.64 {q2-q3}, [arg2, :128]! + vld1.64 {q4-q5}, [arg2, :128]! + vld1.64 {q6-q7}, [arg2, :128]! +CPU_LE( vrev64.8 q0, q0 ) +CPU_LE( vrev64.8 q1, q1 ) +CPU_LE( vrev64.8 q2, q2 ) +CPU_LE( vrev64.8 q3, q3 ) +CPU_LE( vrev64.8 q4, q4 ) +CPU_LE( vrev64.8 q5, q5 ) +CPU_LE( vrev64.8 q6, q6 ) +CPU_LE( vrev64.8 q7, q7 ) + + vswp d0, d1 + vswp d2, d3 + vswp d4, d5 + vswp d6, d7 + vswp d8, d9 + vswp d10, d11 + vswp d12, d13 + vswp d14, d15 + + // XOR the initial_crc value + veor.8 q0, q0, q10 + + adr ip, rk3 + vld1.64 {q10}, [ip, :128] // xmm10 has rk3 and rk4 + + // + // we subtract 256 instead of 128 to save one instruction from the loop + // + sub arg3, arg3, #256 + + // at this section of the code, there is 64*x+y (0<=y<64) bytes of + // buffer. The _fold_64_B_loop will fold 64B at a time + // until we have 64+y Bytes of buffer + + + // fold 64B at a time. This section of the code folds 4 vector + // registers in parallel +_fold_64_B_loop: + + .macro fold64, reg1, reg2 + vld1.64 {q11-q12}, [arg2, :128]! + + vmull.p64 q8, \reg1\()h, d21 + vmull.p64 \reg1, \reg1\()l, d20 + vmull.p64 q9, \reg2\()h, d21 + vmull.p64 \reg2, \reg2\()l, d20 + +CPU_LE( vrev64.8 q11, q11 ) +CPU_LE( vrev64.8 q12, q12 ) + vswp d22, d23 + vswp d24, d25 + + veor.8 \reg1, \reg1, q8 + veor.8 \reg2, \reg2, q9 + veor.8 \reg1, \reg1, q11 + veor.8 \reg2, \reg2, q12 + .endm + + fold64 q0, q1 + fold64 q2, q3 + fold64 q4, q5 + fold64 q6, q7 + + subs arg3, arg3, #128 + + // check if there is another 64B in the buffer to be able to fold + bge _fold_64_B_loop + + // at this point, the buffer pointer is pointing at the last y Bytes + // of the buffer the 64B of folded data is in 4 of the vector + // registers: v0, v1, v2, v3 + + // fold the 8 vector registers to 1 vector register with different + // constants + + adr ip, rk9 + vld1.64 {q10}, [ip, :128]! + + .macro fold16, reg, rk + vmull.p64 q8, \reg\()l, d20 + vmull.p64 \reg, \reg\()h, d21 + .ifnb \rk + vld1.64 {q10}, [ip, :128]! + .endif + veor.8 q7, q7, q8 + veor.8 q7, q7, \reg + .endm + + fold16 q0, rk11 + fold16 q1, rk13 + fold16 q2, rk15 + fold16 q3, rk17 + fold16 q4, rk19 + fold16 q5, rk1 + fold16 q6 + + // instead of 64, we add 48 to the loop counter to save 1 instruction + // from the loop instead of a cmp instruction, we use the negative + // flag with the jl instruction + adds arg3, arg3, #(128-16) + blt _final_reduction_for_128 + + // now we have 16+y bytes left to reduce. 16 Bytes is in register v7 + // and the rest is in memory. We can fold 16 bytes at a time if y>=16 + // continue folding 16B at a time + +_16B_reduction_loop: + vmull.p64 q8, d14, d20 + vmull.p64 q7, d15, d21 + veor.8 q7, q7, q8 + + vld1.64 {q0}, [arg2, :128]! +CPU_LE( vrev64.8 q0, q0 ) + vswp d0, d1 + veor.8 q7, q7, q0 + subs arg3, arg3, #16 + + // instead of a cmp instruction, we utilize the flags with the + // jge instruction equivalent of: cmp arg3, 16-16 + // check if there is any more 16B in the buffer to be able to fold + bge _16B_reduction_loop + + // now we have 16+z bytes left to reduce, where 0<= z < 16. + // first, we reduce the data in the xmm7 register + +_final_reduction_for_128: + // check if any more data to fold. If not, compute the CRC of + // the final 128 bits + adds arg3, arg3, #16 + beq _128_done + + // here we are getting data that is less than 16 bytes. + // since we know that there was data before the pointer, we can + // offset the input pointer before the actual point, to receive + // exactly 16 bytes. after that the registers need to be adjusted. +_get_last_two_regs: + add arg2, arg2, arg3 + sub arg2, arg2, #16 + vld1.64 {q1}, [arg2] +CPU_LE( vrev64.8 q1, q1 ) + vswp d2, d3 + + // get rid of the extra data that was loaded before + // load the shift constant + adr ip, tbl_shf_table + 16 + sub ip, ip, arg3 + vld1.8 {q0}, [ip] + + // shift v2 to the left by arg3 bytes + vtbl.8 d4, {d14-d15}, d0 + vtbl.8 d5, {d14-d15}, d1 + + // shift v7 to the right by 16-arg3 bytes + vmov.i8 q9, #0x80 + veor.8 q0, q0, q9 + vtbl.8 d18, {d14-d15}, d0 + vtbl.8 d19, {d14-d15}, d1 + + // blend + vshr.s8 q0, q0, #7 // convert to 8-bit mask + vbsl.8 q0, q2, q1 + + // fold 16 Bytes + vmull.p64 q8, d18, d20 + vmull.p64 q7, d19, d21 + veor.8 q7, q7, q8 + veor.8 q7, q7, q0 + +_128_done: + // compute crc of a 128-bit value + vldr d20, rk5 + vldr d21, rk6 // rk5 and rk6 in xmm10 + + // 64b fold + vext.8 q0, qzr, q7, #8 + vmull.p64 q7, d15, d20 + veor.8 q7, q7, q0 + + // 32b fold + vext.8 q0, q7, qzr, #12 + vmov s31, s3 + vmull.p64 q0, d0, d21 + veor.8 q7, q0, q7 + + // barrett reduction +_barrett: + vldr d20, rk7 + vldr d21, rk8 + + vmull.p64 q0, d15, d20 + vext.8 q0, qzr, q0, #12 + vmull.p64 q0, d1, d21 + vext.8 q0, qzr, q0, #12 + veor.8 q7, q7, q0 + vmov r0, s29 + +_cleanup: + // scale the result back to 16 bits + lsr r0, r0, #16 + bx lr + +_less_than_128: + teq arg3, #0 + beq _cleanup + + vmov.i8 q0, #0 + vmov s3, arg1_low32 // get the initial crc value + + vld1.64 {q7}, [arg2, :128]! +CPU_LE( vrev64.8 q7, q7 ) + vswp d14, d15 + veor.8 q7, q7, q0 + + cmp arg3, #16 + beq _128_done // exactly 16 left + blt _less_than_16_left + + // now if there is, load the constants + vldr d20, rk1 + vldr d21, rk2 // rk1 and rk2 in xmm10 + + // check if there is enough buffer to be able to fold 16B at a time + subs arg3, arg3, #32 + addlt arg3, arg3, #16 + blt _get_last_two_regs + b _16B_reduction_loop + +_less_than_16_left: + // shl r9, 4 + adr ip, tbl_shf_table + 16 + sub ip, ip, arg3 + vld1.8 {q0}, [ip] + vmov.i8 q9, #0x80 + veor.8 q0, q0, q9 + vtbl.8 d18, {d14-d15}, d0 + vtbl.8 d15, {d14-d15}, d1 + vmov d14, d18 + b _128_done +ENDPROC(crc_t10dif_pmull) + +// precomputed constants +// these constants are precomputed from the poly: +// 0x8bb70000 (0x8bb7 scaled to 32 bits) + .align 4 +// Q = 0x18BB70000 +// rk1 = 2^(32*3) mod Q << 32 +// rk2 = 2^(32*5) mod Q << 32 +// rk3 = 2^(32*15) mod Q << 32 +// rk4 = 2^(32*17) mod Q << 32 +// rk5 = 2^(32*3) mod Q << 32 +// rk6 = 2^(32*2) mod Q << 32 +// rk7 = floor(2^64/Q) +// rk8 = Q + +rk3: .quad 0x9d9d000000000000 +rk4: .quad 0x7cf5000000000000 +rk5: .quad 0x2d56000000000000 +rk6: .quad 0x1368000000000000 +rk7: .quad 0x00000001f65a57f8 +rk8: .quad 0x000000018bb70000 +rk9: .quad 0xceae000000000000 +rk10: .quad 0xbfd6000000000000 +rk11: .quad 0x1e16000000000000 +rk12: .quad 0x713c000000000000 +rk13: .quad 0xf7f9000000000000 +rk14: .quad 0x80a6000000000000 +rk15: .quad 0x044c000000000000 +rk16: .quad 0xe658000000000000 +rk17: .quad 0xad18000000000000 +rk18: .quad 0xa497000000000000 +rk19: .quad 0x6ee3000000000000 +rk20: .quad 0xe7b5000000000000 +rk1: .quad 0x2d56000000000000 +rk2: .quad 0x06df000000000000 + +tbl_shf_table: +// use these values for shift constants for the tbl/tbx instruction +// different alignments result in values as shown: +// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 +// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 +// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 +// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 +// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 +// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 +// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 +// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 +// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 +// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 +// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 +// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 +// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 +// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 +// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 + + .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 + .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c new file mode 100644 index 0000000..d428355 --- /dev/null +++ b/arch/arm/crypto/crct10dif-ce-glue.c @@ -0,0 +1,101 @@ +/* + * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/crc-t10dif.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/hash.h> + +#include <asm/neon.h> +#include <asm/simd.h> + +#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U + +asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u32 len); + +static int crct10dif_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + return 0; +} + +static int crct10dif_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + unsigned int l; + + if (!may_use_simd()) { + *crc = crc_t10dif_generic(*crc, data, length); + } else { + if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { + l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - + ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); + + *crc = crc_t10dif_generic(*crc, data, l); + + length -= l; + data += l; + } + if (length > 0) { + kernel_neon_begin(); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } + } + return 0; +} + +static int crct10dif_final(struct shash_desc *desc, u8 *out) +{ + u16 *crc = shash_desc_ctx(desc); + + *(u16 *)out = *crc; + return 0; +} + +static struct shash_alg crc_t10dif_alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = crct10dif_init, + .update = crct10dif_update, + .final = crct10dif_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + + .base.cra_name = "crct10dif", + .base.cra_driver_name = "crct10dif-arm-ce", + .base.cra_priority = 200, + .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init crc_t10dif_mod_init(void) +{ + if (!(elf_hwcap2 & HWCAP2_PMULL)) + return -ENODEV; + + return crypto_register_shash(&crc_t10dif_alg); +} + +static void __exit crc_t10dif_mod_exit(void) +{ + crypto_unregister_shash(&crc_t10dif_alg); +} + +module_init(crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("crct10dif"); diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index 602e2c2..93ec8fe 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -164,6 +164,14 @@ clocks = <&cpm_syscon0 1 21>; status = "disabled"; }; + + cpm_trng: trng@760000 { + compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76"; + reg = <0x760000 0x7d>; + interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cpm_syscon0 1 25>; + status = "okay"; + }; }; cpm_pcie0: pcie@f2600000 { diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 6bf9e24..ee8db05 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -164,6 +164,14 @@ clocks = <&cps_syscon0 1 21>; status = "disabled"; }; + + cps_trng: trng@760000 { + compatible = "marvell,armada-8k-rng", "inside-secure,safexcel-eip76"; + reg = <0x760000 0x7d>; + interrupts = <GIC_SPI 312 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&cps_syscon0 1 25>; + status = "okay"; + }; }; cps_pcie0: pcie@f4600000 { diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore new file mode 100644 index 0000000..879df87 --- /dev/null +++ b/arch/arm64/crypto/.gitignore @@ -0,0 +1,2 @@ +sha256-core.S +sha512-core.S diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 2cf32e9..450a85d 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -8,6 +8,14 @@ menuconfig ARM64_CRYPTO if ARM64_CRYPTO +config CRYPTO_SHA256_ARM64 + tristate "SHA-224/SHA-256 digest algorithm for arm64" + select CRYPTO_HASH + +config CRYPTO_SHA512_ARM64 + tristate "SHA-384/SHA-512 digest algorithm for arm64" + select CRYPTO_HASH + config CRYPTO_SHA1_ARM64_CE tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" depends on ARM64 && KERNEL_MODE_NEON @@ -23,6 +31,16 @@ config CRYPTO_GHASH_ARM64_CE depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_HASH +config CRYPTO_CRCT10DIF_ARM64_CE + tristate "CRCT10DIF digest algorithm using PMULL instructions" + depends on KERNEL_MODE_NEON && CRC_T10DIF + select CRYPTO_HASH + +config CRYPTO_CRC32_ARM64_CE + tristate "CRC32 and CRC32C digest algorithms using PMULL instructions" + depends on KERNEL_MODE_NEON && CRC32 + select CRYPTO_HASH + config CRYPTO_AES_ARM64_CE tristate "AES core cipher using ARMv8 Crypto Extensions" depends on ARM64 && KERNEL_MODE_NEON @@ -40,17 +58,18 @@ config CRYPTO_AES_ARM64_CE_BLK depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES_ARM64_CE - select CRYPTO_ABLK_HELPER + select CRYPTO_SIMD config CRYPTO_AES_ARM64_NEON_BLK tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" depends on ARM64 && KERNEL_MODE_NEON select CRYPTO_BLKCIPHER select CRYPTO_AES - select CRYPTO_ABLK_HELPER + select CRYPTO_SIMD config CRYPTO_CRC32_ARM64 tristate "CRC32 and CRC32C using optional ARMv8 instructions" depends on ARM64 select CRYPTO_HASH + endif diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index abb79b3..aa8888d 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -17,6 +17,12 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o +obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o +crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o + +obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o +crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o + obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto @@ -29,6 +35,12 @@ aes-ce-blk-y := aes-glue-ce.o aes-ce.o obj-$(CONFIG_CRYPTO_AES_ARM64_NEON_BLK) += aes-neon-blk.o aes-neon-blk-y := aes-glue-neon.o aes-neon.o +obj-$(CONFIG_CRYPTO_SHA256_ARM64) += sha256-arm64.o +sha256-arm64-y := sha256-glue.o sha256-core.o + +obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o +sha512-arm64-y := sha512-glue.o sha512-core.o + AFLAGS_aes-ce.o := -DINTERLEAVE=4 AFLAGS_aes-neon.o := -DINTERLEAVE=4 @@ -40,3 +52,14 @@ CFLAGS_crc32-arm64.o := -mcpu=generic+crc $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE $(call if_changed_rule,cc_o_c) + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $(<) void $(@) + +$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl + $(call cmd,perlasm) + +$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl + $(call cmd,perlasm) + +.PRECIOUS: $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index a2a7fbc..3363560 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> .text .arch armv8-a+crypto @@ -19,7 +20,7 @@ */ ENTRY(ce_aes_ccm_auth_data) ldr w8, [x3] /* leftover from prev round? */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v0.16b}, [x0] /* load mac */ cbz w8, 1f sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b @@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.2d}, [x4] /* load first round key */ +1: ld1 {v3.16b}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.2d}, [x6], #16 /* load next round key */ +4: ld1 {v3.16b}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.2d}, [x6], #16 /* load next round key */ +5: ld1 {v4.16b}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.2d}, [x6], #16 /* load next round key */ + ld1 {v5.16b}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ bne 1b -6: st1 {v0.2d}, [x0] /* store mac */ +6: st1 {v0.16b}, [x0] /* store mac */ beq 10f adds w2, w2, #16 beq 10f @@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.2d}, [x0] + st1 {v0.16b}, [x0] 10: str w8, [x3] ret ENDPROC(ce_aes_ccm_auth_data) @@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.2d}, [x2], #16 /* load first round key */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ - ld1 {v1.2d}, [x1] /* load 1st ctriv */ + ld1 {v1.16b}, [x1] /* load 1st ctriv */ bmi 0f bne 3f mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.2d}, [x2], #16 /* load next round key */ +1: ld1 {v5.16b}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.2d}, [x2], #16 /* load next round key */ +2: ld1 {v3.16b}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.2d}, [x2], #16 /* load next round key */ +3: ld1 {v4.16b}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) aese v1.16b, v4.16b /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ - st1 {v0.2d}, [x0] /* store result */ + st1 {v0.16b}, [x0] /* store result */ ret ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.2d}, [x5] /* load mac */ - rev x8, x8 /* keep swabbed ctr in reg */ + ld1 {v0.16b}, [x5] /* load mac */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.1d}, [x6] /* load upper ctr */ + ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.2d}, [x3] /* load first round key */ + ld1 {v3.16b}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.2d}, [x10], #16 /* load next round key */ +3: ld1 {v3.16b}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.2d}, [x10], #16 /* load next round key */ +4: ld1 {v4.16b}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.2d}, [x10], #16 /* load next round key */ + ld1 {v5.16b}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b @@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x0], #16 /* write output block */ bne 0b - rev x8, x8 - st1 {v0.2d}, [x5] /* store mac */ +CPU_LE( rev x8, x8 ) + st1 {v0.16b}, [x5] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ 5: ret 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.2d}, [x5] /* store mac */ + st1 {v0.16b}, [x5] /* store mac */ add w2, w2, #16 /* process partial tail block */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index f4bf2f2..cc5515d 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -11,9 +11,9 @@ #include <asm/neon.h> #include <asm/unaligned.h> #include <crypto/aes.h> -#include <crypto/algapi.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> #include <linux/module.h> #include "aes-ce-setkey.h" @@ -149,12 +149,7 @@ static int ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); - struct blkcipher_desc desc = { .info = req->iv }; - struct blkcipher_walk walk; - struct scatterlist srcbuf[2]; - struct scatterlist dstbuf[2]; - struct scatterlist *src; - struct scatterlist *dst; + struct skcipher_walk walk; u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen; @@ -172,27 +167,19 @@ static int ccm_encrypt(struct aead_request *req) /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); - src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen); - dst = src; - if (req->src != req->dst) - dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen); - - blkcipher_walk_init(&walk, dst, src, len); - err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, - AES_BLOCK_SIZE); + err = skcipher_walk_aead_encrypt(&walk, req, true); while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; - if (walk.nbytes == len) + if (walk.nbytes == walk.total) tail = 0; ce_aes_ccm_encrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - len -= walk.nbytes - tail; - err = blkcipher_walk_done(&desc, &walk, tail); + err = skcipher_walk_done(&walk, tail); } if (!err) ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); @@ -203,7 +190,7 @@ static int ccm_encrypt(struct aead_request *req) return err; /* copy authtag to end of dst */ - scatterwalk_map_and_copy(mac, dst, req->cryptlen, + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); return 0; @@ -214,12 +201,7 @@ static int ccm_decrypt(struct aead_request *req) struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead); unsigned int authsize = crypto_aead_authsize(aead); - struct blkcipher_desc desc = { .info = req->iv }; - struct blkcipher_walk walk; - struct scatterlist srcbuf[2]; - struct scatterlist dstbuf[2]; - struct scatterlist *src; - struct scatterlist *dst; + struct skcipher_walk walk; u8 __aligned(8) mac[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u32 len = req->cryptlen - authsize; @@ -237,27 +219,19 @@ static int ccm_decrypt(struct aead_request *req) /* preserve the original iv for the final round */ memcpy(buf, req->iv, AES_BLOCK_SIZE); - src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen); - dst = src; - if (req->src != req->dst) - dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen); - - blkcipher_walk_init(&walk, dst, src, len); - err = blkcipher_aead_walk_virt_block(&desc, &walk, aead, - AES_BLOCK_SIZE); + err = skcipher_walk_aead_decrypt(&walk, req, true); while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; - if (walk.nbytes == len) + if (walk.nbytes == walk.total) tail = 0; ce_aes_ccm_decrypt(walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes - tail, ctx->key_enc, num_rounds(ctx), mac, walk.iv); - len -= walk.nbytes - tail; - err = blkcipher_walk_done(&desc, &walk, tail); + err = skcipher_walk_done(&walk, tail); } if (!err) ce_aes_ccm_final(mac, buf, ctx->key_enc, num_rounds(ctx)); @@ -268,7 +242,8 @@ static int ccm_decrypt(struct aead_request *req) return err; /* compare calculated auth tag with the stored one */ - scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize, + scatterwalk_map_and_copy(buf, req->src, + req->assoclen + req->cryptlen - authsize, authsize, 0); if (crypto_memneq(mac, buf, authsize)) @@ -287,6 +262,7 @@ static struct aead_alg ccm_aes_alg = { .cra_module = THIS_MODULE, }, .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, .maxauthsize = AES_BLOCK_SIZE, .setkey = ccm_setkey, .setauthsize = ccm_setauthsize, diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-cipher.c index f7bd9bf..50d9fe1 100644 --- a/arch/arm64/crypto/aes-ce-cipher.c +++ b/arch/arm64/crypto/aes-ce-cipher.c @@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; +#else + rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ + rki[0]; +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 78f3cfe..b46093d 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #define AES_ENTRY(func) ENTRY(ce_ ## func) #define AES_ENDPROC(func) ENDPROC(ce_ ## func) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 6b2aa0f..4e3f8ad 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -11,8 +11,8 @@ #include <asm/neon.h> #include <asm/hwcap.h> #include <crypto/aes.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> #include <linux/module.h> #include <linux/cpufeature.h> #include <crypto/xts.h> @@ -80,13 +80,19 @@ struct crypto_aes_xts_ctx { struct crypto_aes_ctx __aligned(8) key2; }; -static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, +static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len); +} + +static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int ret; - ret = xts_check_key(tfm, in_key, key_len); + ret = xts_verify_key(tfm, in_key, key_len); if (ret) return ret; @@ -97,111 +103,101 @@ static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key, if (!ret) return 0; - tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, true); first = 1; kernel_neon_begin(); @@ -209,17 +205,14 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); first = 0; - nbytes -= blocks * AES_BLOCK_SIZE; - if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE) - break; - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % AES_BLOCK_SIZE); } - if (walk.nbytes % AES_BLOCK_SIZE) { - u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + if (walk.nbytes) { u8 __aligned(8) tail[AES_BLOCK_SIZE]; + unsigned int nbytes = walk.nbytes; + u8 *tdst = walk.dst.virt.addr; + u8 *tsrc = walk.src.virt.addr; /* * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need @@ -230,227 +223,169 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); memcpy(tdst, tail, nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_neon_end(); return err; } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err, first, rounds = 6 + ctx->key1.key_length / 4; - struct blkcipher_walk walk; + struct skcipher_walk walk; unsigned int blocks; - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, true); kernel_neon_begin(); for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; } -static struct crypto_alg aes_algs[] = { { - .cra_name = "__ecb-aes-" MODE, - .cra_driver_name = "__driver-ecb-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = aes_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, +static struct skcipher_alg aes_algs[] = { { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, }, { - .cra_name = "__cbc-aes-" MODE, - .cra_driver_name = "__driver-cbc-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, }, { - .cra_name = "__ctr-aes-" MODE, - .cra_driver_name = "__driver-ctr-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_setkey, - .encrypt = ctr_encrypt, - .decrypt = ctr_encrypt, + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + .encrypt = ctr_encrypt, + .decrypt = ctr_encrypt, }, { - .cra_name = "__xts-aes-" MODE, - .cra_driver_name = "__driver-xts-aes-" MODE, - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_set_key, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-" MODE, + .cra_priority = PRIO, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx), + .cra_alignmask = 7, + .cra_module = THIS_MODULE, }, -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = 0, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-" MODE, - .cra_priority = PRIO, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 7, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - } + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_set_key, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, } }; -static int __init aes_init(void) +static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; + +static void aes_exit(void) { - return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs)); + int i; + + for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++) + simd_skcipher_free(aes_simd_algs[i]); + + crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); } -static void __exit aes_exit(void) +static int __init aes_init(void) { - crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs)); + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; + int err; + int i; + + err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + algname = aes_algs[i].base.cra_name + 2; + drvname = aes_algs[i].base.cra_driver_name + 2; + basename = aes_algs[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aes_simd_algs[i] = simd; + } + + return 0; + +unregister_simds: + aes_exit(); + return err; } #ifdef USE_V8_CRYPTO_EXTENSIONS diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index f6e372c..c53dbea 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -386,7 +386,8 @@ AES_ENDPROC(aes_ctr_encrypt) .endm .Lxts_mul_x: - .word 1, 0, 0x87, 0 +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) FRAME_PUSH diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S index b93170e..85f07ea 100644 --- a/arch/arm64/crypto/aes-neon.S +++ b/arch/arm64/crypto/aes-neon.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #define AES_ENTRY(func) ENTRY(neon_ ## func) #define AES_ENDPROC(func) ENDPROC(neon_ ## func) @@ -83,13 +84,13 @@ .endm .macro do_block, enc, in, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -229,7 +230,7 @@ .endm .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -237,7 +238,7 @@ sub_bytes_2x \in0, \in1 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -254,7 +255,7 @@ .endm .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -266,7 +267,7 @@ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -306,12 +307,16 @@ .text .align 4 .LForward_ShiftRows: - .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 - .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb +CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) +CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) +CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) +CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) .LReverse_ShiftRows: - .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb - .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 +CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) +CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) +CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) +CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S new file mode 100644 index 0000000..18f5a84 --- /dev/null +++ b/arch/arm64/crypto/crc32-ce-core.S @@ -0,0 +1,266 @@ +/* + * Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> + * Alexander Boyko <Alexander_Boyko@xyratex.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .align 6 + .cpu generic+crypto+crc + +.Lcrc32_constants: + /* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ + .octa 0x00000001c6e415960000000154442bd4 + + /* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ + .octa 0x00000000ccaa009e00000001751997d0 + + /* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ + .quad 0x0000000163cd6124 + .quad 0x00000000FFFFFFFF + + /* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` + * = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ + .octa 0x00000001F701164100000001DB710641 + +.Lcrc32c_constants: + .octa 0x000000009e4addf800000000740eef02 + .octa 0x000000014cd00bd600000000f20c0dfe + .quad 0x00000000dd45aab8 + .quad 0x00000000FFFFFFFF + .octa 0x00000000dea713f10000000105ec76f0 + + vCONSTANT .req v0 + dCONSTANT .req d0 + qCONSTANT .req q0 + + BUF .req x0 + LEN .req x1 + CRC .req x2 + + vzr .req v9 + + /** + * Calculate crc32 + * BUF - buffer + * LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pmull_le(unsigned char const *buffer, + * size_t len, uint crc32) + */ +ENTRY(crc32_pmull_le) + adr x3, .Lcrc32_constants + b 0f + +ENTRY(crc32c_pmull_le) + adr x3, .Lcrc32c_constants + +0: bic LEN, LEN, #15 + ld1 {v1.16b-v4.16b}, [BUF], #0x40 + movi vzr.16b, #0 + fmov dCONSTANT, CRC + eor v1.16b, v1.16b, vCONSTANT.16b + sub LEN, LEN, #0x40 + cmp LEN, #0x40 + b.lt less_64 + + ldr qCONSTANT, [x3] + +loop_64: /* 64 bytes Full cache line folding */ + sub LEN, LEN, #0x40 + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull2 v6.1q, v2.2d, vCONSTANT.2d + pmull2 v7.1q, v3.2d, vCONSTANT.2d + pmull2 v8.1q, v4.2d, vCONSTANT.2d + + pmull v1.1q, v1.1d, vCONSTANT.1d + pmull v2.1q, v2.1d, vCONSTANT.1d + pmull v3.1q, v3.1d, vCONSTANT.1d + pmull v4.1q, v4.1d, vCONSTANT.1d + + eor v1.16b, v1.16b, v5.16b + ld1 {v5.16b}, [BUF], #0x10 + eor v2.16b, v2.16b, v6.16b + ld1 {v6.16b}, [BUF], #0x10 + eor v3.16b, v3.16b, v7.16b + ld1 {v7.16b}, [BUF], #0x10 + eor v4.16b, v4.16b, v8.16b + ld1 {v8.16b}, [BUF], #0x10 + + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + eor v4.16b, v4.16b, v8.16b + + cmp LEN, #0x40 + b.ge loop_64 + +less_64: /* Folding cache line into 128bit */ + ldr qCONSTANT, [x3, #16] + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v2.16b + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v3.16b + + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v4.16b + + cbz LEN, fold_64 + +loop_16: /* Folding rest buffer into 128bit */ + subs LEN, LEN, #0x10 + + ld1 {v2.16b}, [BUF], #0x10 + pmull2 v5.1q, v1.2d, vCONSTANT.2d + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v5.16b + eor v1.16b, v1.16b, v2.16b + + b.ne loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + ext v2.16b, v1.16b, v1.16b, #8 + pmull2 v2.1q, v2.2d, vCONSTANT.2d + ext v1.16b, v1.16b, vzr.16b, #8 + eor v1.16b, v1.16b, v2.16b + + /* final 32-bit fold */ + ldr dCONSTANT, [x3, #32] + ldr d3, [x3, #40] + + ext v2.16b, v1.16b, vzr.16b, #4 + and v1.16b, v1.16b, v3.16b + pmull v1.1q, v1.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v2.16b + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ + ldr qCONSTANT, [x3, #48] + + and v2.16b, v1.16b, v3.16b + ext v2.16b, vzr.16b, v2.16b, #8 + pmull2 v2.1q, v2.2d, vCONSTANT.2d + and v2.16b, v2.16b, v3.16b + pmull v2.1q, v2.1d, vCONSTANT.1d + eor v1.16b, v1.16b, v2.16b + mov w0, v1.s[1] + + ret +ENDPROC(crc32_pmull_le) +ENDPROC(crc32c_pmull_le) + + .macro __crc32, c +0: subs x2, x2, #16 + b.mi 8f + ldp x3, x4, [x1], #16 +CPU_BE( rev x3, x3 ) +CPU_BE( rev x4, x4 ) + crc32\c\()x w0, w0, x3 + crc32\c\()x w0, w0, x4 + b.ne 0b + ret + +8: tbz x2, #3, 4f + ldr x3, [x1], #8 +CPU_BE( rev x3, x3 ) + crc32\c\()x w0, w0, x3 +4: tbz x2, #2, 2f + ldr w3, [x1], #4 +CPU_BE( rev w3, w3 ) + crc32\c\()w w0, w0, w3 +2: tbz x2, #1, 1f + ldrh w3, [x1], #2 +CPU_BE( rev16 w3, w3 ) + crc32\c\()h w0, w0, w3 +1: tbz x2, #0, 0f + ldrb w3, [x1] + crc32\c\()b w0, w0, w3 +0: ret + .endm + + .align 5 +ENTRY(crc32_armv8_le) + __crc32 +ENDPROC(crc32_armv8_le) + + .align 5 +ENTRY(crc32c_armv8_le) + __crc32 c +ENDPROC(crc32c_armv8_le) diff --git a/arch/arm64/crypto/crc32-ce-glue.c b/arch/arm64/crypto/crc32-ce-glue.c new file mode 100644 index 0000000..8594127 --- /dev/null +++ b/arch/arm64/crypto/crc32-ce-glue.c @@ -0,0 +1,212 @@ +/* + * Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/cpufeature.h> +#include <linux/crc32.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/hash.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/unaligned.h> + +#define PMULL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pmull_le_16 */ +#define SCALE_F 16L /* size of NEON register */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len); + +static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len); +static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len); + +static int crc32_pmull_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + return 0; +} + +static int crc32c_pmull_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = ~0; + return 0; +} + +static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pmull_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crc = shash_desc_ctx(desc); + + *crc = *mctx; + return 0; +} + +static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if ((u64)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); + + *crc = fallback_crc32(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin_partial(10); + *crc = crc32_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + + if (length > 0) + *crc = fallback_crc32(*crc, data, length); + + return 0; +} + +static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u32 *crc = shash_desc_ctx(desc); + unsigned int l; + + if ((u64)data % SCALE_F) { + l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F)); + + *crc = fallback_crc32c(*crc, data, l); + + data += l; + length -= l; + } + + if (length >= PMULL_MIN_LEN) { + l = round_down(length, SCALE_F); + + kernel_neon_begin_partial(10); + *crc = crc32c_pmull_le(data, l, *crc); + kernel_neon_end(); + + data += l; + length -= l; + } + + if (length > 0) { + *crc = fallback_crc32c(*crc, data, length); + } + + return 0; +} + +static int crc32_pmull_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(*crc, out); + return 0; +} + +static int crc32c_pmull_final(struct shash_desc *desc, u8 *out) +{ + u32 *crc = shash_desc_ctx(desc); + + put_unaligned_le32(~*crc, out); + return 0; +} + +static struct shash_alg crc32_pmull_algs[] = { { + .setkey = crc32_pmull_setkey, + .init = crc32_pmull_init, + .update = crc32_pmull_update, + .final = crc32_pmull_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32_pmull_cra_init, + .base.cra_name = "crc32", + .base.cra_driver_name = "crc32-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +}, { + .setkey = crc32_pmull_setkey, + .init = crc32_pmull_init, + .update = crc32c_pmull_update, + .final = crc32c_pmull_final, + .descsize = sizeof(u32), + .digestsize = sizeof(u32), + + .base.cra_ctxsize = sizeof(u32), + .base.cra_init = crc32c_pmull_cra_init, + .base.cra_name = "crc32c", + .base.cra_driver_name = "crc32c-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_module = THIS_MODULE, +} }; + +static int __init crc32_pmull_mod_init(void) +{ + if (elf_hwcap & HWCAP_CRC32) { + fallback_crc32 = crc32_armv8_le; + fallback_crc32c = crc32c_armv8_le; + } else { + fallback_crc32 = crc32_le; + fallback_crc32c = __crc32c_le; + } + + return crypto_register_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +static void __exit crc32_pmull_mod_exit(void) +{ + crypto_unregister_shashes(crc32_pmull_algs, + ARRAY_SIZE(crc32_pmull_algs)); +} + +module_cpu_feature_match(PMULL, crc32_pmull_mod_init); +module_exit(crc32_pmull_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S new file mode 100644 index 0000000..d5b5a8c --- /dev/null +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -0,0 +1,392 @@ +// +// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions +// +// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License version 2 as +// published by the Free Software Foundation. +// + +// +// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions +// +// Copyright (c) 2013, Intel Corporation +// +// Authors: +// Erdinc Ozturk <erdinc.ozturk@intel.com> +// Vinodh Gopal <vinodh.gopal@intel.com> +// James Guilford <james.guilford@intel.com> +// Tim Chen <tim.c.chen@linux.intel.com> +// +// This software is available to you under a choice of one of two +// licenses. You may choose to be licensed under the terms of the GNU +// General Public License (GPL) Version 2, available from the file +// COPYING in the main directory of this source tree, or the +// OpenIB.org BSD license below: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the +// distribution. +// +// * Neither the name of the Intel Corporation nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// +// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Function API: +// UINT16 crc_t10dif_pcl( +// UINT16 init_crc, //initial CRC value, 16 bits +// const unsigned char *buf, //buffer pointer to calculate CRC on +// UINT64 len //buffer length in bytes (64-bit data) +// ); +// +// Reference paper titled "Fast CRC Computation for Generic +// Polynomials Using PCLMULQDQ Instruction" +// URL: http://www.intel.com/content/dam/www/public/us/en/documents +// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf +// +// + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + .cpu generic+crypto + + arg1_low32 .req w0 + arg2 .req x1 + arg3 .req x2 + + vzr .req v13 + +ENTRY(crc_t10dif_pmull) + movi vzr.16b, #0 // init zero register + + // adjust the 16-bit initial_crc value, scale it to 32 bits + lsl arg1_low32, arg1_low32, #16 + + // check if smaller than 256 + cmp arg3, #256 + + // for sizes less than 128, we can't fold 64B at a time... + b.lt _less_than_128 + + // load the initial crc value + // crc value does not need to be byte-reflected, but it needs + // to be moved to the high part of the register. + // because data will be byte-reflected and will align with + // initial crc at correct place. + movi v10.16b, #0 + mov v10.s[3], arg1_low32 // initial crc + + // receive the initial 64B data, xor the initial crc value + ldp q0, q1, [arg2] + ldp q2, q3, [arg2, #0x20] + ldp q4, q5, [arg2, #0x40] + ldp q6, q7, [arg2, #0x60] + add arg2, arg2, #0x80 + +CPU_LE( rev64 v0.16b, v0.16b ) +CPU_LE( rev64 v1.16b, v1.16b ) +CPU_LE( rev64 v2.16b, v2.16b ) +CPU_LE( rev64 v3.16b, v3.16b ) +CPU_LE( rev64 v4.16b, v4.16b ) +CPU_LE( rev64 v5.16b, v5.16b ) +CPU_LE( rev64 v6.16b, v6.16b ) +CPU_LE( rev64 v7.16b, v7.16b ) + +CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) +CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) +CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 ) +CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 ) +CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 ) +CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 ) +CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 ) +CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + + // XOR the initial_crc value + eor v0.16b, v0.16b, v10.16b + + ldr q10, rk3 // xmm10 has rk3 and rk4 + // type of pmull instruction + // will determine which constant to use + + // + // we subtract 256 instead of 128 to save one instruction from the loop + // + sub arg3, arg3, #256 + + // at this section of the code, there is 64*x+y (0<=y<64) bytes of + // buffer. The _fold_64_B_loop will fold 64B at a time + // until we have 64+y Bytes of buffer + + + // fold 64B at a time. This section of the code folds 4 vector + // registers in parallel +_fold_64_B_loop: + + .macro fold64, reg1, reg2 + ldp q11, q12, [arg2], #0x20 + + pmull2 v8.1q, \reg1\().2d, v10.2d + pmull \reg1\().1q, \reg1\().1d, v10.1d + +CPU_LE( rev64 v11.16b, v11.16b ) +CPU_LE( rev64 v12.16b, v12.16b ) + + pmull2 v9.1q, \reg2\().2d, v10.2d + pmull \reg2\().1q, \reg2\().1d, v10.1d + +CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 ) +CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) + + eor \reg1\().16b, \reg1\().16b, v8.16b + eor \reg2\().16b, \reg2\().16b, v9.16b + eor \reg1\().16b, \reg1\().16b, v11.16b + eor \reg2\().16b, \reg2\().16b, v12.16b + .endm + + fold64 v0, v1 + fold64 v2, v3 + fold64 v4, v5 + fold64 v6, v7 + + subs arg3, arg3, #128 + + // check if there is another 64B in the buffer to be able to fold + b.ge _fold_64_B_loop + + // at this point, the buffer pointer is pointing at the last y Bytes + // of the buffer the 64B of folded data is in 4 of the vector + // registers: v0, v1, v2, v3 + + // fold the 8 vector registers to 1 vector register with different + // constants + + ldr q10, rk9 + + .macro fold16, reg, rk + pmull v8.1q, \reg\().1d, v10.1d + pmull2 \reg\().1q, \reg\().2d, v10.2d + .ifnb \rk + ldr q10, \rk + .endif + eor v7.16b, v7.16b, v8.16b + eor v7.16b, v7.16b, \reg\().16b + .endm + + fold16 v0, rk11 + fold16 v1, rk13 + fold16 v2, rk15 + fold16 v3, rk17 + fold16 v4, rk19 + fold16 v5, rk1 + fold16 v6 + + // instead of 64, we add 48 to the loop counter to save 1 instruction + // from the loop instead of a cmp instruction, we use the negative + // flag with the jl instruction + adds arg3, arg3, #(128-16) + b.lt _final_reduction_for_128 + + // now we have 16+y bytes left to reduce. 16 Bytes is in register v7 + // and the rest is in memory. We can fold 16 bytes at a time if y>=16 + // continue folding 16B at a time + +_16B_reduction_loop: + pmull v8.1q, v7.1d, v10.1d + pmull2 v7.1q, v7.2d, v10.2d + eor v7.16b, v7.16b, v8.16b + + ldr q0, [arg2], #16 +CPU_LE( rev64 v0.16b, v0.16b ) +CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) + eor v7.16b, v7.16b, v0.16b + subs arg3, arg3, #16 + + // instead of a cmp instruction, we utilize the flags with the + // jge instruction equivalent of: cmp arg3, 16-16 + // check if there is any more 16B in the buffer to be able to fold + b.ge _16B_reduction_loop + + // now we have 16+z bytes left to reduce, where 0<= z < 16. + // first, we reduce the data in the xmm7 register + +_final_reduction_for_128: + // check if any more data to fold. If not, compute the CRC of + // the final 128 bits + adds arg3, arg3, #16 + b.eq _128_done + + // here we are getting data that is less than 16 bytes. + // since we know that there was data before the pointer, we can + // offset the input pointer before the actual point, to receive + // exactly 16 bytes. after that the registers need to be adjusted. +_get_last_two_regs: + add arg2, arg2, arg3 + ldr q1, [arg2, #-16] +CPU_LE( rev64 v1.16b, v1.16b ) +CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 ) + + // get rid of the extra data that was loaded before + // load the shift constant + adr x4, tbl_shf_table + 16 + sub x4, x4, arg3 + ld1 {v0.16b}, [x4] + + // shift v2 to the left by arg3 bytes + tbl v2.16b, {v7.16b}, v0.16b + + // shift v7 to the right by 16-arg3 bytes + movi v9.16b, #0x80 + eor v0.16b, v0.16b, v9.16b + tbl v7.16b, {v7.16b}, v0.16b + + // blend + sshr v0.16b, v0.16b, #7 // convert to 8-bit mask + bsl v0.16b, v2.16b, v1.16b + + // fold 16 Bytes + pmull v8.1q, v7.1d, v10.1d + pmull2 v7.1q, v7.2d, v10.2d + eor v7.16b, v7.16b, v8.16b + eor v7.16b, v7.16b, v0.16b + +_128_done: + // compute crc of a 128-bit value + ldr q10, rk5 // rk5 and rk6 in xmm10 + + // 64b fold + ext v0.16b, vzr.16b, v7.16b, #8 + mov v7.d[0], v7.d[1] + pmull v7.1q, v7.1d, v10.1d + eor v7.16b, v7.16b, v0.16b + + // 32b fold + ext v0.16b, v7.16b, vzr.16b, #4 + mov v7.s[3], vzr.s[0] + pmull2 v0.1q, v0.2d, v10.2d + eor v7.16b, v7.16b, v0.16b + + // barrett reduction +_barrett: + ldr q10, rk7 + mov v0.d[0], v7.d[1] + + pmull v0.1q, v0.1d, v10.1d + ext v0.16b, vzr.16b, v0.16b, #12 + pmull2 v0.1q, v0.2d, v10.2d + ext v0.16b, vzr.16b, v0.16b, #12 + eor v7.16b, v7.16b, v0.16b + mov w0, v7.s[1] + +_cleanup: + // scale the result back to 16 bits + lsr x0, x0, #16 + ret + +_less_than_128: + cbz arg3, _cleanup + + movi v0.16b, #0 + mov v0.s[3], arg1_low32 // get the initial crc value + + ldr q7, [arg2], #0x10 +CPU_LE( rev64 v7.16b, v7.16b ) +CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) + eor v7.16b, v7.16b, v0.16b // xor the initial crc value + + cmp arg3, #16 + b.eq _128_done // exactly 16 left + b.lt _less_than_16_left + + ldr q10, rk1 // rk1 and rk2 in xmm10 + + // update the counter. subtract 32 instead of 16 to save one + // instruction from the loop + subs arg3, arg3, #32 + b.ge _16B_reduction_loop + + add arg3, arg3, #16 + b _get_last_two_regs + +_less_than_16_left: + // shl r9, 4 + adr x0, tbl_shf_table + 16 + sub x0, x0, arg3 + ld1 {v0.16b}, [x0] + movi v9.16b, #0x80 + eor v0.16b, v0.16b, v9.16b + tbl v7.16b, {v7.16b}, v0.16b + b _128_done +ENDPROC(crc_t10dif_pmull) + +// precomputed constants +// these constants are precomputed from the poly: +// 0x8bb70000 (0x8bb7 scaled to 32 bits) + .align 4 +// Q = 0x18BB70000 +// rk1 = 2^(32*3) mod Q << 32 +// rk2 = 2^(32*5) mod Q << 32 +// rk3 = 2^(32*15) mod Q << 32 +// rk4 = 2^(32*17) mod Q << 32 +// rk5 = 2^(32*3) mod Q << 32 +// rk6 = 2^(32*2) mod Q << 32 +// rk7 = floor(2^64/Q) +// rk8 = Q + +rk1: .octa 0x06df0000000000002d56000000000000 +rk3: .octa 0x7cf50000000000009d9d000000000000 +rk5: .octa 0x13680000000000002d56000000000000 +rk7: .octa 0x000000018bb7000000000001f65a57f8 +rk9: .octa 0xbfd6000000000000ceae000000000000 +rk11: .octa 0x713c0000000000001e16000000000000 +rk13: .octa 0x80a6000000000000f7f9000000000000 +rk15: .octa 0xe658000000000000044c000000000000 +rk17: .octa 0xa497000000000000ad18000000000000 +rk19: .octa 0xe7b50000000000006ee3000000000000 + +tbl_shf_table: +// use these values for shift constants for the tbl/tbx instruction +// different alignments result in values as shown: +// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1 +// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2 +// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3 +// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4 +// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5 +// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6 +// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7 +// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8 +// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9 +// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10 +// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11 +// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12 +// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13 +// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14 +// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15 + + .byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 + .byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0 diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c new file mode 100644 index 0000000..60cb590 --- /dev/null +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -0,0 +1,95 @@ +/* + * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions + * + * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/cpufeature.h> +#include <linux/crc-t10dif.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/string.h> + +#include <crypto/internal/hash.h> + +#include <asm/neon.h> + +#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U + +asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len); + +static int crct10dif_init(struct shash_desc *desc) +{ + u16 *crc = shash_desc_ctx(desc); + + *crc = 0; + return 0; +} + +static int crct10dif_update(struct shash_desc *desc, const u8 *data, + unsigned int length) +{ + u16 *crc = shash_desc_ctx(desc); + unsigned int l; + + if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) { + l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE - + ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)); + + *crc = crc_t10dif_generic(*crc, data, l); + + length -= l; + data += l; + } + + if (length > 0) { + kernel_neon_begin_partial(14); + *crc = crc_t10dif_pmull(*crc, data, length); + kernel_neon_end(); + } + + return 0; +} + +static int crct10dif_final(struct shash_desc *desc, u8 *out) +{ + u16 *crc = shash_desc_ctx(desc); + + *(u16 *)out = *crc; + return 0; +} + +static struct shash_alg crc_t10dif_alg = { + .digestsize = CRC_T10DIF_DIGEST_SIZE, + .init = crct10dif_init, + .update = crct10dif_update, + .final = crct10dif_final, + .descsize = CRC_T10DIF_DIGEST_SIZE, + + .base.cra_name = "crct10dif", + .base.cra_driver_name = "crct10dif-arm64-ce", + .base.cra_priority = 200, + .base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init crc_t10dif_mod_init(void) +{ + return crypto_register_shash(&crc_t10dif_alg); +} + +static void __exit crc_t10dif_mod_exit(void) +{ + crypto_unregister_shash(&crc_t10dif_alg); +} + +module_cpu_feature_match(PMULL, crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_exit); + +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index dc45701..f0bb9f0b 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -29,8 +29,8 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {SHASH.16b}, [x3] - ld1 {XL.16b}, [x1] + ld1 {SHASH.2d}, [x3] + ld1 {XL.2d}, [x1] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 shl MASK.2d, MASK.2d, #57 @@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b ) cbnz w0, 0b - st1 {XL.16b}, [x1] + st1 {XL.2d}, [x1] ret ENDPROC(pmull_ghash_update) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 033aae6..c98e7e8 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform) ld1r {k3.4s}, [x6] /* load state */ - ldr dga, [x0] + ld1 {dgav.4s}, [x0] ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ @@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) b 1b /* store new state */ -3: str dga, [x0] +3: st1 {dgav.4s}, [x0] str dgb, [x0, #16] ret ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 5df9d9d..01cfee0 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform) ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ldp dga, dgb, [x0] + ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] @@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b ) b 1b /* store new state */ -3: stp dga, dgb, [x0] +3: st1 {dgav.4s, dgbv.4s}, [x0] ret ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped new file mode 100644 index 0000000..3ce82cc --- /dev/null +++ b/arch/arm64/crypto/sha256-core.S_shipped @@ -0,0 +1,2061 @@ +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significanty faster +// and the gap is only 40-90%. +// +// October 2016. +// +// Originally it was reckoned that it makes no sense to implement NEON +// version of SHA256 for 64-bit processors. This is because performance +// improvement on most wide-spread Cortex-A5x processors was observed +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was +// observed that 32-bit NEON SHA256 performs significantly better than +// 64-bit scalar version on *some* of the more recent processors. As +// result 64-bit NEON version of SHA256 was added to provide best +// all-round performance. For example it executes ~30% faster on X-Gene +// and Mongoose. [For reference, NEON version of SHA512 is bound to +// deliver much less improvement, likely *negative* on Cortex-A5x. +// Which is why NEON support is limited to SHA256.] + +#ifndef __KERNEL__ +# include "arm_arch.h" +#endif + +.text + +.extern OPENSSL_armcap_P +.globl sha256_block_data_order +.type sha256_block_data_order,%function +.align 6 +sha256_block_data_order: +#ifndef __KERNEL__ +# ifdef __ILP32__ + ldrsw x16,.LOPENSSL_armcap_P +# else + ldr x16,.LOPENSSL_armcap_P +# endif + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + tst w16,#ARMV7_NEON + b.ne .Lneon_entry +#endif + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,.LK256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha256_block_data_order,.-sha256_block_data_order + +.align 6 +.type .LK256,%object +.LK256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +.size .LK256,.-.LK256 +#ifndef __KERNEL__ +.align 3 +.LOPENSSL_armcap_P: +# ifdef __ILP32__ + .long OPENSSL_armcap_P-. +# else + .quad OPENSSL_armcap_P-. +# endif +#endif +.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#ifndef __KERNEL__ +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,.LK256 + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +#endif +#ifdef __KERNEL__ +.globl sha256_block_neon +#endif +.type sha256_block_neon,%function +.align 4 +sha256_block_neon: +.Lneon_entry: + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,.LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s-v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s-v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b .L_00_48 + +.align 4 +.L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne .L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret +.size sha256_block_neon,.-sha256_block_neon +#ifndef __KERNEL__ +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c new file mode 100644 index 0000000..a2226f8 --- /dev/null +++ b/arch/arm64/crypto/sha256-glue.c @@ -0,0 +1,185 @@ +/* + * Linux/arm64 port of the OpenSSL SHA256 implementation for AArch64 + * + * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha256_base.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> + +MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash for arm64"); +MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); + +asmlinkage void sha256_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + +asmlinkage void sha256_block_neon(u32 *digest, const void *data, + unsigned int num_blks); + +static int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); +} + +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + + return sha256_base_finish(desc, out); +} + +static int sha256_final(struct shash_desc *desc, u8 *out) +{ + return sha256_finup(desc, NULL, 0, out); +} + +static struct shash_alg algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, + .descsize = sizeof(struct sha256_state), + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-arm64", + .base.cra_priority = 100, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_update, + .final = sha256_final, + .finup = sha256_finup, + .descsize = sizeof(struct sha256_state), + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-arm64", + .base.cra_priority = 100, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +} }; + +static int sha256_update_neon(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + /* + * Stacking and unstacking a substantial slice of the NEON register + * file may significantly affect performance for small updates when + * executing in interrupt context, so fall back to the scalar code + * in that case. + */ + if (!may_use_simd()) + return sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + + kernel_neon_begin(); + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_neon); + kernel_neon_end(); + + return 0; +} + +static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) { + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_data_order); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_data_order); + } else { + kernel_neon_begin(); + if (len) + sha256_base_do_update(desc, data, len, + (sha256_block_fn *)sha256_block_neon); + sha256_base_do_finalize(desc, + (sha256_block_fn *)sha256_block_neon); + kernel_neon_end(); + } + return sha256_base_finish(desc, out); +} + +static int sha256_final_neon(struct shash_desc *desc, u8 *out) +{ + return sha256_finup_neon(desc, NULL, 0, out); +} + +static struct shash_alg neon_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_update_neon, + .final = sha256_final_neon, + .finup = sha256_finup_neon, + .descsize = sizeof(struct sha256_state), + .base.cra_name = "sha256", + .base.cra_driver_name = "sha256-arm64-neon", + .base.cra_priority = 150, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA256_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_update_neon, + .final = sha256_final_neon, + .finup = sha256_finup_neon, + .descsize = sizeof(struct sha256_state), + .base.cra_name = "sha224", + .base.cra_driver_name = "sha224-arm64-neon", + .base.cra_priority = 150, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA224_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +} }; + +static int __init sha256_mod_init(void) +{ + int ret = crypto_register_shashes(algs, ARRAY_SIZE(algs)); + if (ret) + return ret; + + if (elf_hwcap & HWCAP_ASIMD) { + ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs)); + if (ret) + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + } + return ret; +} + +static void __exit sha256_mod_fini(void) +{ + if (elf_hwcap & HWCAP_ASIMD) + crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs)); + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(sha256_mod_init); +module_exit(sha256_mod_fini); diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl new file mode 100644 index 0000000..c55efb3 --- /dev/null +++ b/arch/arm64/crypto/sha512-armv8.pl @@ -0,0 +1,778 @@ +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPLv2 terms is granted. +# ==================================================================== +# +# SHA256/512 for ARMv8. +# +# Performance in cycles per processed byte and improvement coefficient +# over code generated with "default" compiler: +# +# SHA256-hw SHA256(*) SHA512 +# Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +# Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +# Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +# Denver 2.01 10.5 (+26%) 6.70 (+8%) +# X-Gene 20.0 (+100%) 12.8 (+300%(***)) +# Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +# +# (*) Software SHA256 results are of lesser relevance, presented +# mostly for informational purposes. +# (**) The result is a trade-off: it's possible to improve it by +# 10% (or by 1 cycle per round), but at the cost of 20% loss +# on Cortex-A53 (or by 4 cycles per round). +# (***) Super-impressive coefficients over gcc-generated code are +# indication of some compiler "pathology", most notably code +# generated with -mgeneral-regs-only is significanty faster +# and the gap is only 40-90%. +# +# October 2016. +# +# Originally it was reckoned that it makes no sense to implement NEON +# version of SHA256 for 64-bit processors. This is because performance +# improvement on most wide-spread Cortex-A5x processors was observed +# to be marginal, same on Cortex-A53 and ~10% on A57. But then it was +# observed that 32-bit NEON SHA256 performs significantly better than +# 64-bit scalar version on *some* of the more recent processors. As +# result 64-bit NEON version of SHA256 was added to provide best +# all-round performance. For example it executes ~30% faster on X-Gene +# and Mongoose. [For reference, NEON version of SHA512 is bound to +# deliver much less improvement, likely *negative* on Cortex-A5x. +# Which is why NEON support is limited to SHA256.] + +$output=pop; +$flavour=pop; + +if ($flavour && $flavour ne "void") { + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or + die "can't locate arm-xlate.pl"; + + open OUT,"| \"$^X\" $xlate $flavour $output"; + *STDOUT=*OUT; +} else { + open STDOUT,">$output"; +} + +if ($output =~ /512/) { + $BITS=512; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); + @sigma0=(1, 8, 7); + @sigma1=(19,61, 6); + $rounds=80; + $reg_t="x"; +} else { + $BITS=256; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); + @sigma0=( 7,18, 3); + @sigma1=(17,19,10); + $rounds=64; + $reg_t="w"; +} + +$func="sha${BITS}_block_data_order"; + +($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30)); + +@X=map("$reg_t$_",(3..15,0..2)); +@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("$reg_t$_",(20..27)); +($t0,$t1,$t2,$t3)=map("$reg_t$_",(16,17,19,28)); + +sub BODY_00_xx { +my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; +my $j=($i+1)&15; +my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]); + $T0=@X[$i+3] if ($i<11); + +$code.=<<___ if ($i<16); +#ifndef __AARCH64EB__ + rev @X[$i],@X[$i] // $i +#endif +___ +$code.=<<___ if ($i<13 && ($i&1)); + ldp @X[$i+1],@X[$i+2],[$inp],#2*$SZ +___ +$code.=<<___ if ($i==13); + ldp @X[14],@X[15],[$inp] +___ +$code.=<<___ if ($i>=14); + ldr @X[($i-11)&15],[sp,#`$SZ*(($i-11)%4)`] +___ +$code.=<<___ if ($i>0 && $i<16); + add $a,$a,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=11); + str @X[($i-8)&15],[sp,#`$SZ*(($i-8)%4)`] +___ +# While ARMv8 specifies merged rotate-n-logical operation such as +# 'eor x,y,z,ror#n', it was found to negatively affect performance +# on Apple A7. The reason seems to be that it requires even 'y' to +# be available earlier. This means that such merged instruction is +# not necessarily best choice on critical path... On the other hand +# Cortex-A5x handles merged instructions much better than disjoint +# rotate and logical... See (**) footnote above. +$code.=<<___ if ($i<15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + eor $T0,$e,$e,ror#`$Sigma1[2]-$Sigma1[1]` + and $t1,$f,$e + bic $t2,$g,$e + add $h,$h,@X[$i&15] // h+=X[i] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$T0,ror#$Sigma1[1] // Sigma1(e) + ror $T0,$a,#$Sigma0[0] + add $h,$h,$t1 // h+=Ch(e,f,g) + eor $t1,$a,$a,ror#`$Sigma0[2]-$Sigma0[1]` + add $h,$h,$t0 // h+=Sigma1(e) + and $t3,$t3,$t2 // (b^c)&=(a^b) + add $d,$d,$h // d+=h + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$t1,ror#$Sigma0[1] // Sigma0(a) + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + //add $h,$h,$t1 // h+=Sigma0(a) +___ +$code.=<<___ if ($i>=15); + ror $t0,$e,#$Sigma1[0] + add $h,$h,$t2 // h+=K[i] + ror $T1,@X[($j+1)&15],#$sigma0[0] + and $t1,$f,$e + ror $T2,@X[($j+14)&15],#$sigma1[0] + bic $t2,$g,$e + ror $T0,$a,#$Sigma0[0] + add $h,$h,@X[$i&15] // h+=X[i] + eor $t0,$t0,$e,ror#$Sigma1[1] + eor $T1,$T1,@X[($j+1)&15],ror#$sigma0[1] + orr $t1,$t1,$t2 // Ch(e,f,g) + eor $t2,$a,$b // a^b, b^c in next round + eor $t0,$t0,$e,ror#$Sigma1[2] // Sigma1(e) + eor $T0,$T0,$a,ror#$Sigma0[1] + add $h,$h,$t1 // h+=Ch(e,f,g) + and $t3,$t3,$t2 // (b^c)&=(a^b) + eor $T2,$T2,@X[($j+14)&15],ror#$sigma1[1] + eor $T1,$T1,@X[($j+1)&15],lsr#$sigma0[2] // sigma0(X[i+1]) + add $h,$h,$t0 // h+=Sigma1(e) + eor $t3,$t3,$b // Maj(a,b,c) + eor $t1,$T0,$a,ror#$Sigma0[2] // Sigma0(a) + eor $T2,$T2,@X[($j+14)&15],lsr#$sigma1[2] // sigma1(X[i+14]) + add @X[$j],@X[$j],@X[($j+9)&15] + add $d,$d,$h // d+=h + add $h,$h,$t3 // h+=Maj(a,b,c) + ldr $t3,[$Ktbl],#$SZ // *K++, $t2 in next round + add @X[$j],@X[$j],$T1 + add $h,$h,$t1 // h+=Sigma0(a) + add @X[$j],@X[$j],$T2 +___ + ($t2,$t3)=($t3,$t2); +} + +$code.=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +#endif + +.text + +.extern OPENSSL_armcap_P +.globl $func +.type $func,%function +.align 6 +$func: +___ +$code.=<<___ if ($SZ==4); +#ifndef __KERNEL__ +# ifdef __ILP32__ + ldrsw x16,.LOPENSSL_armcap_P +# else + ldr x16,.LOPENSSL_armcap_P +# endif + adr x17,.LOPENSSL_armcap_P + add x16,x16,x17 + ldr w16,[x16] + tst w16,#ARMV8_SHA256 + b.ne .Lv8_entry + tst w16,#ARMV7_NEON + b.ne .Lneon_entry +#endif +___ +$code.=<<___; + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*$SZ + + ldp $A,$B,[$ctx] // load context + ldp $C,$D,[$ctx,#2*$SZ] + ldp $E,$F,[$ctx,#4*$SZ] + add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input + ldp $G,$H,[$ctx,#6*$SZ] + adr $Ktbl,.LK$BITS + stp $ctx,$num,[x29,#96] + +.Loop: + ldp @X[0],@X[1],[$inp],#2*$SZ + ldr $t2,[$Ktbl],#$SZ // *K++ + eor $t3,$B,$C // magic seed + str $inp,[x29,#112] +___ +for ($i=0;$i<16;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=".Loop_16_xx:\n"; +for (;$i<32;$i++) { &BODY_00_xx($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + cbnz $t2,.Loop_16_xx + + ldp $ctx,$num,[x29,#96] + ldr $inp,[x29,#112] + sub $Ktbl,$Ktbl,#`$SZ*($rounds+1)` // rewind + + ldp @X[0],@X[1],[$ctx] + ldp @X[2],@X[3],[$ctx,#2*$SZ] + add $inp,$inp,#14*$SZ // advance input pointer + ldp @X[4],@X[5],[$ctx,#4*$SZ] + add $A,$A,@X[0] + ldp @X[6],@X[7],[$ctx,#6*$SZ] + add $B,$B,@X[1] + add $C,$C,@X[2] + add $D,$D,@X[3] + stp $A,$B,[$ctx] + add $E,$E,@X[4] + add $F,$F,@X[5] + stp $C,$D,[$ctx,#2*$SZ] + add $G,$G,@X[6] + add $H,$H,@X[7] + cmp $inp,$num + stp $E,$F,[$ctx,#4*$SZ] + stp $G,$H,[$ctx,#6*$SZ] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*$SZ + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size $func,.-$func + +.align 6 +.type .LK$BITS,%object +.LK$BITS: +___ +$code.=<<___ if ($SZ==8); + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +___ +$code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +___ +$code.=<<___; +.size .LK$BITS,.-.LK$BITS +#ifndef __KERNEL__ +.align 3 +.LOPENSSL_armcap_P: +# ifdef __ILP32__ + .long OPENSSL_armcap_P-. +# else + .quad OPENSSL_armcap_P-. +# endif +#endif +.asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +___ + +if ($SZ==4) { +my $Ktbl="x3"; + +my ($ABCD,$EFGH,$abcd)=map("v$_.16b",(0..2)); +my @MSG=map("v$_.16b",(4..7)); +my ($W0,$W1)=("v16.4s","v17.4s"); +my ($ABCD_SAVE,$EFGH_SAVE)=("v18.16b","v19.16b"); + +$code.=<<___; +#ifndef __KERNEL__ +.type sha256_block_armv8,%function +.align 6 +sha256_block_armv8: +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1.32 {$ABCD,$EFGH},[$ctx] + adr $Ktbl,.LK256 + +.Loop_hw: + ld1 {@MSG[0]-@MSG[3]},[$inp],#64 + sub $num,$num,#1 + ld1.32 {$W0},[$Ktbl],#16 + rev32 @MSG[0],@MSG[0] + rev32 @MSG[1],@MSG[1] + rev32 @MSG[2],@MSG[2] + rev32 @MSG[3],@MSG[3] + orr $ABCD_SAVE,$ABCD,$ABCD // offload + orr $EFGH_SAVE,$EFGH,$EFGH +___ +for($i=0;$i<12;$i++) { +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + sha256su0 @MSG[0],@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + sha256su1 @MSG[0],@MSG[2],@MSG[3] +___ + ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG)); +} +$code.=<<___; + ld1.32 {$W1},[$Ktbl],#16 + add.i32 $W0,$W0,@MSG[0] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + ld1.32 {$W0},[$Ktbl],#16 + add.i32 $W1,$W1,@MSG[1] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + ld1.32 {$W1},[$Ktbl] + add.i32 $W0,$W0,@MSG[2] + sub $Ktbl,$Ktbl,#$rounds*$SZ-16 // rewind + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W0 + sha256h2 $EFGH,$abcd,$W0 + + add.i32 $W1,$W1,@MSG[3] + orr $abcd,$ABCD,$ABCD + sha256h $ABCD,$EFGH,$W1 + sha256h2 $EFGH,$abcd,$W1 + + add.i32 $ABCD,$ABCD,$ABCD_SAVE + add.i32 $EFGH,$EFGH,$EFGH_SAVE + + cbnz $num,.Loop_hw + + st1.32 {$ABCD,$EFGH},[$ctx] + + ldr x29,[sp],#16 + ret +.size sha256_block_armv8,.-sha256_block_armv8 +#endif +___ +} + +if ($SZ==4) { ######################################### NEON stuff # +# You'll surely note a lot of similarities with sha256-armv4 module, +# and of course it's not a coincidence. sha256-armv4 was used as +# initial template, but was adapted for ARMv8 instruction set and +# extensively re-tuned for all-round performance. + +my @V = ($A,$B,$C,$D,$E,$F,$G,$H) = map("w$_",(3..10)); +my ($t0,$t1,$t2,$t3,$t4) = map("w$_",(11..15)); +my $Ktbl="x16"; +my $Xfer="x17"; +my @X = map("q$_",(0..3)); +my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7) = map("q$_",(4..7,16..19)); +my $j=0; + +sub AUTOLOAD() # thunk [simplified] x86-style perlasm +{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./; + my $arg = pop; + $arg = "#$arg" if ($arg*1 eq $arg); + $code .= "\t$opcode\t".join(',',@_,$arg)."\n"; +} + +sub Dscalar { shift =~ m|[qv]([0-9]+)|?"d$1":""; } +sub Dlo { shift =~ m|[qv]([0-9]+)|?"v$1.d[0]":""; } +sub Dhi { shift =~ m|[qv]([0-9]+)|?"v$1.d[1]":""; } + +sub Xupdate() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + &ext_8 ($T0,@X[0],@X[1],4); # X[1..4] + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &ext_8 ($T3,@X[2],@X[3],4); # X[9..12] + eval(shift(@insns)); + eval(shift(@insns)); + &mov (&Dscalar($T7),&Dhi(@X[3])); # X[14..15] + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T2,$T0,$sigma0[0]); + eval(shift(@insns)); + &ushr_32 ($T1,$T0,$sigma0[2]); + eval(shift(@insns)); + &add_32 (@X[0],@X[0],$T3); # X[0..3] += X[9..12] + eval(shift(@insns)); + &sli_32 ($T2,$T0,32-$sigma0[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T3,$T0,$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T1,$T1,$T2); + eval(shift(@insns)); + eval(shift(@insns)); + &sli_32 ($T3,$T0,32-$sigma0[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T4,$T7,$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T1,$T1,$T3); # sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &sli_32 ($T4,$T7,32-$sigma1[0]); + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T5,$T7,$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T3,$T7,$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &add_32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4]) + eval(shift(@insns)); + eval(shift(@insns)); + &sli_u32 ($T3,$T7,32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T5,$T5,$T4); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T5,$T5,$T3); # sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &add_32 (@X[0],@X[0],$T5); # X[0..1] += sigma1(X[14..15]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &ushr_32 ($T6,@X[0],$sigma1[0]); + eval(shift(@insns)); + &ushr_32 ($T7,@X[0],$sigma1[2]); + eval(shift(@insns)); + eval(shift(@insns)); + &sli_32 ($T6,@X[0],32-$sigma1[0]); + eval(shift(@insns)); + &ushr_32 ($T5,@X[0],$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T7,$T7,$T6); + eval(shift(@insns)); + eval(shift(@insns)); + &sli_32 ($T5,@X[0],32-$sigma1[1]); + eval(shift(@insns)); + eval(shift(@insns)); + &ld1_32 ("{$T0}","[$Ktbl], #16"); + eval(shift(@insns)); + &eor_8 ($T7,$T7,$T5); # sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + &eor_8 ($T5,$T5,$T5); + eval(shift(@insns)); + eval(shift(@insns)); + &mov (&Dhi($T5), &Dlo($T7)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &add_32 (@X[0],@X[0],$T5); # X[2..3] += sigma1(X[16..17]) + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &add_32 ($T0,$T0,@X[0]); + while($#insns>=1) { eval(shift(@insns)); } + &st1_32 ("{$T0}","[$Xfer], #16"); + eval(shift(@insns)); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub Xpreload() +{ use integer; + my $body = shift; + my @insns = (&$body,&$body,&$body,&$body); + my ($a,$b,$c,$d,$e,$f,$g,$h); + + eval(shift(@insns)); + eval(shift(@insns)); + &ld1_8 ("{@X[0]}","[$inp],#16"); + eval(shift(@insns)); + eval(shift(@insns)); + &ld1_32 ("{$T0}","[$Ktbl],#16"); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &rev32 (@X[0],@X[0]); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + eval(shift(@insns)); + &add_32 ($T0,$T0,@X[0]); + foreach (@insns) { eval; } # remaining instructions + &st1_32 ("{$T0}","[$Xfer], #16"); + + push(@X,shift(@X)); # "rotate" X[] +} + +sub body_00_15 () { + ( + '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'. + '&add ($h,$h,$t1)', # h+=X[i]+K[i] + '&add ($a,$a,$t4);'. # h+=Sigma0(a) from the past + '&and ($t1,$f,$e)', + '&bic ($t4,$g,$e)', + '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))', + '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past + '&orr ($t1,$t1,$t4)', # Ch(e,f,g) + '&eor ($t0,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e) + '&eor ($t4,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))', + '&add ($h,$h,$t1)', # h+=Ch(e,f,g) + '&ror ($t0,$t0,"#$Sigma1[0]")', + '&eor ($t2,$a,$b)', # a^b, b^c in next round + '&eor ($t4,$t4,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a) + '&add ($h,$h,$t0)', # h+=Sigma1(e) + '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'. + '&ldr ($t1,"[$Ktbl]") if ($j==15);'. + '&and ($t3,$t3,$t2)', # (b^c)&=(a^b) + '&ror ($t4,$t4,"#$Sigma0[0]")', + '&add ($d,$d,$h)', # d+=h + '&eor ($t3,$t3,$b)', # Maj(a,b,c) + '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);' + ) +} + +$code.=<<___; +#ifdef __KERNEL__ +.globl sha256_block_neon +#endif +.type sha256_block_neon,%function +.align 4 +sha256_block_neon: +.Lneon_entry: + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr $Ktbl,.LK256 + add $num,$inp,$num,lsl#6 // len to point at the end of inp + + ld1.8 {@X[0]},[$inp], #16 + ld1.8 {@X[1]},[$inp], #16 + ld1.8 {@X[2]},[$inp], #16 + ld1.8 {@X[3]},[$inp], #16 + ld1.32 {$T0},[$Ktbl], #16 + ld1.32 {$T1},[$Ktbl], #16 + ld1.32 {$T2},[$Ktbl], #16 + ld1.32 {$T3},[$Ktbl], #16 + rev32 @X[0],@X[0] // yes, even on + rev32 @X[1],@X[1] // big-endian + rev32 @X[2],@X[2] + rev32 @X[3],@X[3] + mov $Xfer,sp + add.32 $T0,$T0,@X[0] + add.32 $T1,$T1,@X[1] + add.32 $T2,$T2,@X[2] + st1.32 {$T0-$T1},[$Xfer], #32 + add.32 $T3,$T3,@X[3] + st1.32 {$T2-$T3},[$Xfer] + sub $Xfer,$Xfer,#32 + + ldp $A,$B,[$ctx] + ldp $C,$D,[$ctx,#8] + ldp $E,$F,[$ctx,#16] + ldp $G,$H,[$ctx,#24] + ldr $t1,[sp,#0] + mov $t2,wzr + eor $t3,$B,$C + mov $t4,wzr + b .L_00_48 + +.align 4 +.L_00_48: +___ + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); + &Xupdate(\&body_00_15); +$code.=<<___; + cmp $t1,#0 // check for K256 terminator + ldr $t1,[sp,#0] + sub $Xfer,$Xfer,#64 + bne .L_00_48 + + sub $Ktbl,$Ktbl,#256 // rewind $Ktbl + cmp $inp,$num + mov $Xfer, #64 + csel $Xfer, $Xfer, xzr, eq + sub $inp,$inp,$Xfer // avoid SEGV + mov $Xfer,sp +___ + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); + &Xpreload(\&body_00_15); +$code.=<<___; + add $A,$A,$t4 // h+=Sigma0(a) from the past + ldp $t0,$t1,[$ctx,#0] + add $A,$A,$t2 // h+=Maj(a,b,c) from the past + ldp $t2,$t3,[$ctx,#8] + add $A,$A,$t0 // accumulate + add $B,$B,$t1 + ldp $t0,$t1,[$ctx,#16] + add $C,$C,$t2 + add $D,$D,$t3 + ldp $t2,$t3,[$ctx,#24] + add $E,$E,$t0 + add $F,$F,$t1 + ldr $t1,[sp,#0] + stp $A,$B,[$ctx,#0] + add $G,$G,$t2 + mov $t2,wzr + stp $C,$D,[$ctx,#8] + add $H,$H,$t3 + stp $E,$F,[$ctx,#16] + eor $t3,$B,$C + stp $G,$H,[$ctx,#24] + mov $t4,wzr + mov $Xfer,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret +.size sha256_block_neon,.-sha256_block_neon +___ +} + +$code.=<<___; +#ifndef __KERNEL__ +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +{ my %opcode = ( + "sha256h" => 0x5e004000, "sha256h2" => 0x5e005000, + "sha256su0" => 0x5e282800, "sha256su1" => 0x5e006000 ); + + sub unsha256 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach(split("\n",$code)) { + + s/\`([^\`]*)\`/eval($1)/ge; + + s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge; + + s/\bq([0-9]+)\b/v$1.16b/g; # old->new registers + + s/\.[ui]?8(\s)/$1/; + s/\.\w?32\b// and s/\.16b/\.4s/g; + m/(ld|st)1[^\[]+\[0\]/ and s/\.4s/\.s/g; + + print $_,"\n"; +} + +close STDOUT; diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped new file mode 100644 index 0000000..bd0f59f --- /dev/null +++ b/arch/arm64/crypto/sha512-core.S_shipped @@ -0,0 +1,1085 @@ +// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +// +// Licensed under the OpenSSL license (the "License"). You may not use +// this file except in compliance with the License. You can obtain a copy +// in the file LICENSE in the source distribution or at +// https://www.openssl.org/source/license.html + +// ==================================================================== +// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +// project. The module is, however, dual licensed under OpenSSL and +// CRYPTOGAMS licenses depending on where you obtain it. For further +// details see http://www.openssl.org/~appro/cryptogams/. +// +// Permission to use under GPLv2 terms is granted. +// ==================================================================== +// +// SHA256/512 for ARMv8. +// +// Performance in cycles per processed byte and improvement coefficient +// over code generated with "default" compiler: +// +// SHA256-hw SHA256(*) SHA512 +// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) +// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) +// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) +// Denver 2.01 10.5 (+26%) 6.70 (+8%) +// X-Gene 20.0 (+100%) 12.8 (+300%(***)) +// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) +// +// (*) Software SHA256 results are of lesser relevance, presented +// mostly for informational purposes. +// (**) The result is a trade-off: it's possible to improve it by +// 10% (or by 1 cycle per round), but at the cost of 20% loss +// on Cortex-A53 (or by 4 cycles per round). +// (***) Super-impressive coefficients over gcc-generated code are +// indication of some compiler "pathology", most notably code +// generated with -mgeneral-regs-only is significanty faster +// and the gap is only 40-90%. +// +// October 2016. +// +// Originally it was reckoned that it makes no sense to implement NEON +// version of SHA256 for 64-bit processors. This is because performance +// improvement on most wide-spread Cortex-A5x processors was observed +// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was +// observed that 32-bit NEON SHA256 performs significantly better than +// 64-bit scalar version on *some* of the more recent processors. As +// result 64-bit NEON version of SHA256 was added to provide best +// all-round performance. For example it executes ~30% faster on X-Gene +// and Mongoose. [For reference, NEON version of SHA512 is bound to +// deliver much less improvement, likely *negative* on Cortex-A5x. +// Which is why NEON support is limited to SHA256.] + +#ifndef __KERNEL__ +# include "arm_arch.h" +#endif + +.text + +.extern OPENSSL_armcap_P +.globl sha512_block_data_order +.type sha512_block_data_order,%function +.align 6 +sha512_block_data_order: + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,.LK512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size sha512_block_data_order,.-sha512_block_data_order + +.align 6 +.type .LK512,%object +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +.size .LK512,.-.LK512 +#ifndef __KERNEL__ +.align 3 +.LOPENSSL_armcap_P: +# ifdef __ILP32__ + .long OPENSSL_armcap_P-. +# else + .quad OPENSSL_armcap_P-. +# endif +#endif +.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#ifndef __KERNEL__ +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c new file mode 100644 index 0000000..aff35c9 --- /dev/null +++ b/arch/arm64/crypto/sha512-glue.c @@ -0,0 +1,94 @@ +/* + * Linux/arm64 port of the OpenSSL SHA512 implementation for AArch64 + * + * Copyright (c) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <crypto/sha512_base.h> +#include <asm/neon.h> + +MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash for arm64"); +MODULE_AUTHOR("Andy Polyakov <appro@openssl.org>"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); + +asmlinkage void sha512_block_data_order(u32 *digest, const void *data, + unsigned int num_blks); + +static int sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); +} + +static int sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (len) + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); + sha512_base_do_finalize(desc, + (sha512_block_fn *)sha512_block_data_order); + + return sha512_base_finish(desc, out); +} + +static int sha512_final(struct shash_desc *desc, u8 *out) +{ + return sha512_finup(desc, NULL, 0, out); +} + +static struct shash_alg algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_update, + .final = sha512_final, + .finup = sha512_finup, + .descsize = sizeof(struct sha512_state), + .base.cra_name = "sha512", + .base.cra_driver_name = "sha512-arm64", + .base.cra_priority = 150, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA512_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_update, + .final = sha512_final, + .finup = sha512_finup, + .descsize = sizeof(struct sha512_state), + .base.cra_name = "sha384", + .base.cra_driver_name = "sha384-arm64", + .base.cra_priority = 150, + .base.cra_flags = CRYPTO_ALG_TYPE_SHASH, + .base.cra_blocksize = SHA384_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +} }; + +static int __init sha512_mod_init(void) +{ + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha512_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(sha512_mod_init); +module_exit(sha512_mod_fini); diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 7998c17..87f4045 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o -obj-$(CONFIG_CRYPT_CRC32C_VPMSUM) += crc32c-vpmsum.o +obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index aa8b067..31c34ee 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -21,7 +21,6 @@ #include <linux/hardirq.h> #include <linux/types.h> -#include <linux/crypto.h> #include <linux/module.h> #include <linux/err.h> #include <crypto/algapi.h> @@ -29,14 +28,14 @@ #include <crypto/cryptd.h> #include <crypto/ctr.h> #include <crypto/b128ops.h> -#include <crypto/lrw.h> #include <crypto/xts.h> #include <asm/cpu_device_id.h> #include <asm/fpu/api.h> #include <asm/crypto/aes.h> -#include <crypto/ablk_helper.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> #include <linux/workqueue.h> #include <linux/spinlock.h> #ifdef CONFIG_X86_64 @@ -45,28 +44,26 @@ #define AESNI_ALIGN 16 +#define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) #define RFC4106_HASH_SUBKEY_SIZE 16 +#define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) +#define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) +#define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. */ struct aesni_rfc4106_gcm_ctx { - u8 hash_subkey[16] __attribute__ ((__aligned__(AESNI_ALIGN))); - struct crypto_aes_ctx aes_key_expanded - __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; u8 nonce[4]; }; -struct aesni_lrw_ctx { - struct lrw_table_ctx lrw_table; - u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; -}; - struct aesni_xts_ctx { - u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; - u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; }; asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -360,96 +357,95 @@ static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) aesni_dec(ctx, dst, src); } -static int ecb_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) +{ + return aes_set_key_common(crypto_skcipher_tfm(tfm), + crypto_skcipher_ctx(tfm), key, len); +} + +static int ecb_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int ecb_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); return err; } -static int cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } kernel_fpu_end(); @@ -458,7 +454,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, #ifdef CONFIG_X86_64 static void ctr_crypt_final(struct crypto_aes_ctx *ctx, - struct blkcipher_walk *walk) + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[AES_BLOCK_SIZE]; @@ -491,157 +487,53 @@ static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, } #endif -static int ctr_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, true); kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); nbytes &= AES_BLOCK_SIZE - 1; - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } if (walk.nbytes) { ctr_crypt_final(ctx, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); return err; } -#endif - -static int ablk_ecb_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-ecb-aes-aesni"); -} - -static int ablk_cbc_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-cbc-aes-aesni"); -} - -#ifdef CONFIG_X86_64 -static int ablk_ctr_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); -} - -#endif - -#if IS_ENABLED(CONFIG_CRYPTO_PCBC) -static int ablk_pcbc_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "fpu(pcbc(__driver-aes-aesni))"); -} -#endif - -static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) -{ - aesni_ecb_enc(ctx, blks, blks, nbytes); -} -static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) -{ - aesni_ecb_dec(ctx, blks, blks, nbytes); -} - -static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, +static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { - struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); int err; - err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, - keylen - AES_BLOCK_SIZE); + err = xts_verify_key(tfm, key, keylen); if (err) return err; - return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); -} - -static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) -{ - struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), - .crypt_fn = lrw_xts_encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = lrw_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), - .crypt_fn = lrw_xts_decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = lrw_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; + keylen /= 2; /* first half of xts-key is for crypt */ - err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); + err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, + key, keylen); if (err) return err; /* second half of xts-key is for tweak */ - return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, - keylen / 2); + return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, + key + keylen, keylen); } @@ -650,8 +542,6 @@ static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) aesni_enc(ctx, out, in); } -#ifdef CONFIG_X86_64 - static void aesni_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, GLUE_FUNC_CAST(aesni_enc)); @@ -698,83 +588,28 @@ static const struct common_glue_ctx aesni_dec_xts = { } } }; -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&aesni_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(aesni_xts_tweak), - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); + return glue_xts_req_128bit(&aesni_enc_xts, req, + XTS_TWEAK_CAST(aesni_xts_tweak), + aes_ctx(ctx->raw_tweak_ctx), + aes_ctx(ctx->raw_crypt_ctx)); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&aesni_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(aesni_xts_tweak), - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx)); -} - -#else - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = aesni_xts_tweak, - .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), - .crypt_fn = lrw_xts_encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = xts_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[8]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), - .tweak_fn = aesni_xts_tweak, - .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), - .crypt_fn = lrw_xts_decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - - kernel_fpu_begin(); - ret = xts_crypt(desc, dst, src, nbytes, &req); - kernel_fpu_end(); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return ret; + return glue_xts_req_128bit(&aesni_dec_xts, req, + XTS_TWEAK_CAST(aesni_xts_tweak), + aes_ctx(ctx->raw_tweak_ctx), + aes_ctx(ctx->raw_crypt_ctx)); } -#endif - -#ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_aead *aead) { struct cryptd_aead *cryptd_tfm; @@ -1077,9 +912,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, .cra_module = THIS_MODULE, .cra_u = { .cipher = { @@ -1091,14 +924,12 @@ static struct crypto_alg aesni_algs[] = { { } } }, { - .cra_name = "__aes-aesni", - .cra_driver_name = "__driver-aes-aesni", - .cra_priority = 0, + .cra_name = "__aes", + .cra_driver_name = "__aes-aesni", + .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, .cra_module = THIS_MODULE, .cra_u = { .cipher = { @@ -1109,250 +940,94 @@ static struct crypto_alg aesni_algs[] = { { .cia_decrypt = __aes_decrypt } } -}, { - .cra_name = "__ecb-aes-aesni", - .cra_driver_name = "__driver-ecb-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-aes-aesni", - .cra_driver_name = "__driver-cbc-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_set_key, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_ecb_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, +} }; + +static struct skcipher_alg aesni_skciphers[] = { + { + .base = { + .cra_name = "__ecb(aes)", + .cra_driver_name = "__ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, -}, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_cbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base = { + .cra_name = "__cbc(aes)", + .cra_driver_name = "__cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, #ifdef CONFIG_X86_64 -}, { - .cra_name = "__ctr-aes-aesni", - .cra_driver_name = "__driver-ctr-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct crypto_aes_ctx) + - AESNI_ALIGN - 1, - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = aes_set_key, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, + }, { + .base = { + .cra_name = "__ctr(aes)", + .cra_driver_name = "__ctr-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = CRYPTO_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, -}, { - .cra_name = "ctr(aes)", - .cra_driver_name = "ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .setkey = aesni_skcipher_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base = { + .cra_name = "__xts(aes)", + .cra_driver_name = "__xts-aes-aesni", + .cra_priority = 401, + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = XTS_AES_CTX_SIZE, + .cra_module = THIS_MODULE, }, - }, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, #endif + } +}; + +struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; + +struct { + const char *algname; + const char *drvname; + const char *basename; + struct simd_skcipher_alg *simd; +} aesni_simd_skciphers2[] = { #if IS_ENABLED(CONFIG_CRYPTO_PCBC) -}, { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_pcbc_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, + { + .algname = "pcbc(aes)", + .drvname = "pcbc-aes-aesni", + .basename = "fpu(pcbc(__aes-aesni))", }, #endif -}, { - .cra_name = "__lrw-aes-aesni", - .cra_driver_name = "__driver-lrw-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesni_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_aesni_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = lrw_aesni_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-aes-aesni", - .cra_driver_name = "__driver-xts-aes-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aesni_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = xts_aesni_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(aes)", - .cra_driver_name = "xts-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; +}; #ifdef CONFIG_X86_64 static struct aead_alg aesni_aead_algs[] = { { @@ -1401,9 +1076,27 @@ static const struct x86_cpu_id aesni_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); +static void aesni_free_simds(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && + aesni_simd_skciphers[i]; i++) + simd_skcipher_free(aesni_simd_skciphers[i]); + + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) && + aesni_simd_skciphers2[i].simd; i++) + simd_skcipher_free(aesni_simd_skciphers2[i].simd); +} + static int __init aesni_init(void) { + struct simd_skcipher_alg *simd; + const char *basename; + const char *algname; + const char *drvname; int err; + int i; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1445,13 +1138,48 @@ static int __init aesni_init(void) if (err) goto fpu_exit; + err = crypto_register_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); + if (err) + goto unregister_algs; + err = crypto_register_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); if (err) - goto unregister_algs; + goto unregister_skciphers; + + for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) { + algname = aesni_skciphers[i].base.cra_name + 2; + drvname = aesni_skciphers[i].base.cra_driver_name + 2; + basename = aesni_skciphers[i].base.cra_driver_name; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + + aesni_simd_skciphers[i] = simd; + } - return err; + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) { + algname = aesni_simd_skciphers2[i].algname; + drvname = aesni_simd_skciphers2[i].drvname; + basename = aesni_simd_skciphers2[i].basename; + simd = simd_skcipher_create_compat(algname, drvname, basename); + err = PTR_ERR(simd); + if (IS_ERR(simd)) + goto unregister_simds; + aesni_simd_skciphers2[i].simd = simd; + } + + return 0; + +unregister_simds: + aesni_free_simds(); + crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); +unregister_skciphers: + crypto_unregister_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); fpu_exit: @@ -1461,7 +1189,10 @@ fpu_exit: static void __exit aesni_exit(void) { + aesni_free_simds(); crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); + crypto_unregister_skciphers(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers)); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); crypto_fpu_exit(); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index e7d679e..4066804 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -11,143 +11,186 @@ * */ -#include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> -#include <linux/crypto.h> #include <asm/fpu/api.h> struct crypto_fpu_ctx { - struct crypto_blkcipher *child; + struct crypto_skcipher *child; }; -static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); - struct crypto_blkcipher *child = ctx->child; + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; int err; - crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_blkcipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_fpu_encrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; + SKCIPHER_REQUEST_ON_STACK(subreq, child); int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; + + skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, 0, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); + err = crypto_skcipher_encrypt(subreq); kernel_fpu_end(); + + skcipher_request_zero(subreq); return err; } -static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_fpu_decrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; + SKCIPHER_REQUEST_ON_STACK(subreq, child); int err; - struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); - struct crypto_blkcipher *child = ctx->child; - struct blkcipher_desc desc = { - .tfm = child, - .info = desc_in->info, - .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, - }; + + skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, 0, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); kernel_fpu_begin(); - err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); + err = crypto_skcipher_decrypt(subreq); kernel_fpu_end(); + + skcipher_request_zero(subreq); return err; } -static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) +static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_blkcipher *cipher; + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher_spawn *spawn; + struct crypto_skcipher *cipher; - cipher = crypto_spawn_blkcipher(spawn); + spawn = skcipher_instance_ctx(inst); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; + return 0; } -static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) +static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm) +{ + struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); +} + +static void crypto_fpu_free(struct skcipher_instance *inst) { - struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_blkcipher(ctx->child); + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); } -static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) +static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct crypto_skcipher_spawn *spawn; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct skcipher_alg *alg; + const char *cipher_name; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) & + algt->mask) + return -EINVAL; + + if (!(algt->mask & CRYPTO_ALG_INTERNAL)) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC); if (err) - return ERR_PTR(err); - - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); - - inst = crypto_alloc_instance("fpu", alg); - if (IS_ERR(inst)) - goto out_put_alg; - - inst->alg.cra_flags = alg->cra_flags; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = alg->cra_type; - inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); - inst->alg.cra_init = crypto_fpu_init_tfm; - inst->alg.cra_exit = crypto_fpu_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; - -out_put_alg: - crypto_mod_put(alg); - return inst; -} + goto out_free_inst; -static void crypto_fpu_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + alg = crypto_skcipher_spawn_alg(spawn); + + err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu", + &alg->base); + if (err) + goto out_drop_skcipher; + + inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = alg->base.cra_blocksize; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; + + inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); + + inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx); + + inst->alg.init = crypto_fpu_init_tfm; + inst->alg.exit = crypto_fpu_exit_tfm; + + inst->alg.setkey = crypto_fpu_setkey; + inst->alg.encrypt = crypto_fpu_encrypt; + inst->alg.decrypt = crypto_fpu_decrypt; + + inst->free = crypto_fpu_free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto out_drop_skcipher; + +out: + return err; + +out_drop_skcipher: + crypto_drop_skcipher(spawn); +out_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_fpu_tmpl = { .name = "fpu", - .alloc = crypto_fpu_alloc, - .free = crypto_fpu_free, + .create = crypto_fpu_create, .module = THIS_MODULE, }; diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 6a85598..260a060 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -27,10 +27,10 @@ #include <linux/module.h> #include <crypto/b128ops.h> +#include <crypto/internal/skcipher.h> #include <crypto/lrw.h> #include <crypto/xts.h> #include <asm/crypto/glue_helper.h> -#include <crypto/scatterwalk.h> static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, @@ -339,6 +339,41 @@ done: return nbytes; } +static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, + void *ctx, + struct skcipher_walk *walk) +{ + const unsigned int bsize = 128 / 8; + unsigned int nbytes = walk->nbytes; + u128 *src = walk->src.virt.addr; + u128 *dst = walk->dst.virt.addr; + unsigned int num_blocks, func_bytes; + unsigned int i; + + /* Process multi-block batch */ + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes >= func_bytes) { + do { + gctx->funcs[i].fn_u.xts(ctx, dst, src, + walk->iv); + + src += num_blocks; + dst += num_blocks; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); + + if (nbytes < bsize) + goto done; + } + } + +done: + return nbytes; +} + /* for implementations implementing faster XTS IV generator */ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, struct blkcipher_desc *desc, struct scatterlist *dst, @@ -379,6 +414,43 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); +int glue_xts_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx) +{ + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); + nbytes = walk.nbytes; + if (!nbytes) + return err; + + /* set minimum length to bsize, for tweak_fn */ + fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, + nbytes < bsize ? bsize : nbytes); + + /* calculate first value of T */ + tweak_fn(tweak_ctx, walk.iv, walk.iv); + + while (nbytes) { + nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk); + + err = skcipher_walk_done(&walk, nbytes); + nbytes = walk.nbytes; + } + + glue_fpu_end(fpu_enabled); + + return err; +} +EXPORT_SYMBOL_GPL(glue_xts_req_128bit); + void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, common_glue_func_t fn) { diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c index 9e5b671..acf9fdf 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -114,7 +114,7 @@ static inline void sha1_init_digest(uint32_t *digest) } static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], - uint32_t total_len) + uint64_t total_len) { uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h index 98a35bc..13590cc 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h @@ -125,7 +125,7 @@ struct sha1_hash_ctx { /* error flag */ int error; - uint32_t total_length; + uint64_t total_length; const void *incoming_buffer; uint32_t incoming_buffer_length; uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 6f97fb3..7926a22 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -115,7 +115,7 @@ inline void sha256_init_digest(uint32_t *digest) } inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], - uint32_t total_len) + uint64_t total_len) { uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h index edd252b..aabb303 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h @@ -125,7 +125,7 @@ struct sha256_hash_ctx { /* error flag */ int error; - uint32_t total_length; + uint64_t total_length; const void *incoming_buffer; uint32_t incoming_buffer_length; uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index d210174..9c1bb6d 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -117,7 +117,7 @@ inline void sha512_init_digest(uint64_t *digest) } inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], - uint32_t total_len) + uint64_t total_len) { uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h index 9d4b2c8..e4653f5 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h @@ -119,7 +119,7 @@ struct sha512_hash_ctx { /* error flag */ int error; - uint32_t total_length; + uint64_t total_length; const void *incoming_buffer; uint32_t incoming_buffer_length; uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 03bb106..29e53ea 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -5,8 +5,8 @@ #ifndef _CRYPTO_GLUE_HELPER_H #define _CRYPTO_GLUE_HELPER_H +#include <crypto/internal/skcipher.h> #include <linux/kernel.h> -#include <linux/crypto.h> #include <asm/fpu/api.h> #include <crypto/b128ops.h> @@ -69,6 +69,31 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, return true; } +static inline bool glue_skwalk_fpu_begin(unsigned int bsize, + int fpu_blocks_limit, + struct skcipher_walk *walk, + bool fpu_enabled, unsigned int nbytes) +{ + if (likely(fpu_blocks_limit < 0)) + return false; + + if (fpu_enabled) + return true; + + /* + * Vector-registers are only used when chunk to be processed is large + * enough, so do not enable FPU until it is necessary. + */ + if (nbytes < bsize * (unsigned int)fpu_blocks_limit) + return false; + + /* prevent sleeping if FPU is in use */ + skcipher_walk_atomise(walk); + + kernel_fpu_begin(); + return true; +} + static inline void glue_fpu_end(bool fpu_enabled) { if (fpu_enabled) @@ -139,6 +164,18 @@ extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx); +extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, + struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx); + +extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req, + common_glue_func_t tweak_fn, void *tweak_ctx, + void *crypt_ctx); + extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src, le128 *iv, common_glue_func_t fn); diff --git a/crypto/842.c b/crypto/842.c index 98e387e..bc26dc9 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -31,11 +31,46 @@ #include <linux/module.h> #include <linux/crypto.h> #include <linux/sw842.h> +#include <crypto/internal/scompress.h> struct crypto842_ctx { - char wmem[SW842_MEM_COMPRESS]; /* working memory for compress */ + void *wmem; /* working memory for compress */ }; +static void *crypto842_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = kmalloc(SW842_MEM_COMPRESS, GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + +static int crypto842_init(struct crypto_tfm *tfm) +{ + struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->wmem = crypto842_alloc_ctx(NULL); + if (IS_ERR(ctx->wmem)) + return -ENOMEM; + + return 0; +} + +static void crypto842_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + kfree(ctx); +} + +static void crypto842_exit(struct crypto_tfm *tfm) +{ + struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto842_free_ctx(NULL, ctx->wmem); +} + static int crypto842_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) @@ -45,6 +80,13 @@ static int crypto842_compress(struct crypto_tfm *tfm, return sw842_compress(src, slen, dst, dlen, ctx->wmem); } +static int crypto842_scompress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + return sw842_compress(src, slen, dst, dlen, ctx); +} + static int crypto842_decompress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) @@ -52,6 +94,13 @@ static int crypto842_decompress(struct crypto_tfm *tfm, return sw842_decompress(src, slen, dst, dlen); } +static int crypto842_sdecompress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + return sw842_decompress(src, slen, dst, dlen); +} + static struct crypto_alg alg = { .cra_name = "842", .cra_driver_name = "842-generic", @@ -59,20 +108,48 @@ static struct crypto_alg alg = { .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, .cra_ctxsize = sizeof(struct crypto842_ctx), .cra_module = THIS_MODULE, + .cra_init = crypto842_init, + .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, .coa_decompress = crypto842_decompress } } }; +static struct scomp_alg scomp = { + .alloc_ctx = crypto842_alloc_ctx, + .free_ctx = crypto842_free_ctx, + .compress = crypto842_scompress, + .decompress = crypto842_sdecompress, + .base = { + .cra_name = "842", + .cra_driver_name = "842-scomp", + .cra_priority = 100, + .cra_module = THIS_MODULE, + } +}; + static int __init crypto842_mod_init(void) { - return crypto_register_alg(&alg); + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg); + return ret; + } + + return ret; } module_init(crypto842_mod_init); static void __exit crypto842_mod_exit(void) { crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); } module_exit(crypto842_mod_exit); diff --git a/crypto/Kconfig b/crypto/Kconfig index 84d7148..160f08e 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -24,7 +24,7 @@ comment "Crypto core or helper" config CRYPTO_FIPS bool "FIPS 200 compliance" depends on (CRYPTO_ANSI_CPRNG || CRYPTO_DRBG) && !CRYPTO_MANAGER_DISABLE_TESTS - depends on MODULE_SIG + depends on (MODULE_SIG || !MODULES) help This options enables the fips boot option which is required if you want to system to operate in a FIPS 200 @@ -102,6 +102,15 @@ config CRYPTO_KPP select CRYPTO_ALGAPI select CRYPTO_KPP2 +config CRYPTO_ACOMP2 + tristate + select CRYPTO_ALGAPI2 + +config CRYPTO_ACOMP + tristate + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + config CRYPTO_RSA tristate "RSA algorithm" select CRYPTO_AKCIPHER @@ -138,6 +147,7 @@ config CRYPTO_MANAGER2 select CRYPTO_BLKCIPHER2 select CRYPTO_AKCIPHER2 select CRYPTO_KPP2 + select CRYPTO_ACOMP2 config CRYPTO_USER tristate "Userspace cryptographic algorithm configuration" @@ -236,10 +246,14 @@ config CRYPTO_ABLK_HELPER tristate select CRYPTO_CRYPTD +config CRYPTO_SIMD + tristate + select CRYPTO_CRYPTD + config CRYPTO_GLUE_HELPER_X86 tristate depends on X86 - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER config CRYPTO_ENGINE tristate @@ -437,7 +451,7 @@ config CRYPTO_CRC32C_INTEL gain performance compared with software implementation. Module will be crc32c-intel. -config CRYPT_CRC32C_VPMSUM +config CRYPTO_CRC32C_VPMSUM tristate "CRC32c CRC algorithm (powerpc64)" depends on PPC64 && ALTIVEC select CRYPTO_HASH @@ -928,14 +942,13 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_NI_INTEL tristate "AES cipher algorithms (AES-NI)" depends on X86 + select CRYPTO_AEAD select CRYPTO_AES_X86_64 if 64BIT select CRYPTO_AES_586 if !64BIT - select CRYPTO_CRYPTD - select CRYPTO_ABLK_HELPER select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_GLUE_HELPER_X86 if 64BIT - select CRYPTO_LRW - select CRYPTO_XTS + select CRYPTO_SIMD help Use Intel AES-NI instructions for AES algorithm. @@ -1568,6 +1581,7 @@ comment "Compression" config CRYPTO_DEFLATE tristate "Deflate compression algorithm" select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 select ZLIB_INFLATE select ZLIB_DEFLATE help @@ -1579,6 +1593,7 @@ config CRYPTO_DEFLATE config CRYPTO_LZO tristate "LZO compression algorithm" select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 select LZO_COMPRESS select LZO_DECOMPRESS help @@ -1587,6 +1602,7 @@ config CRYPTO_LZO config CRYPTO_842 tristate "842 compression algorithm" select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 select 842_COMPRESS select 842_DECOMPRESS help @@ -1595,6 +1611,7 @@ config CRYPTO_842 config CRYPTO_LZ4 tristate "LZ4 compression algorithm" select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 select LZ4_COMPRESS select LZ4_DECOMPRESS help @@ -1603,6 +1620,7 @@ config CRYPTO_LZ4 config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 select LZ4HC_COMPRESS select LZ4_DECOMPRESS help diff --git a/crypto/Makefile b/crypto/Makefile index bd6a029..b8f0e3e 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -51,6 +51,10 @@ rsa_generic-y += rsa_helper.o rsa_generic-y += rsa-pkcs1pad.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o +crypto_acompress-y := acompress.o +crypto_acompress-y += scompress.o +obj-$(CONFIG_CRYPTO_ACOMP2) += crypto_acompress.o + cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o @@ -139,3 +143,5 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx/ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += asymmetric_keys/ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_ABLK_HELPER) += ablk_helper.o +crypto_simd-y := simd.o +obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o diff --git a/crypto/acompress.c b/crypto/acompress.c new file mode 100644 index 0000000..887783d --- /dev/null +++ b/crypto/acompress.c @@ -0,0 +1,169 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li <weigang.li@intel.com> + * Giovanni Cabiddu <giovanni.cabiddu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <linux/cryptouser.h> +#include <net/netlink.h> +#include <crypto/internal/acompress.h> +#include <crypto/internal/scompress.h> +#include "internal.h" + +static const struct crypto_type crypto_acomp_type; + +#ifdef CONFIG_NET +static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_acomp racomp; + + strncpy(racomp.type, "acomp", sizeof(racomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, + sizeof(struct crypto_report_acomp), &racomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); + +static void crypto_acomp_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_puts(m, "type : acomp\n"); +} + +static void crypto_acomp_exit_tfm(struct crypto_tfm *tfm) +{ + struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); + struct acomp_alg *alg = crypto_acomp_alg(acomp); + + alg->exit(acomp); +} + +static int crypto_acomp_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_acomp *acomp = __crypto_acomp_tfm(tfm); + struct acomp_alg *alg = crypto_acomp_alg(acomp); + + if (tfm->__crt_alg->cra_type != &crypto_acomp_type) + return crypto_init_scomp_ops_async(tfm); + + acomp->compress = alg->compress; + acomp->decompress = alg->decompress; + acomp->dst_free = alg->dst_free; + acomp->reqsize = alg->reqsize; + + if (alg->exit) + acomp->base.exit = crypto_acomp_exit_tfm; + + if (alg->init) + return alg->init(acomp); + + return 0; +} + +static unsigned int crypto_acomp_extsize(struct crypto_alg *alg) +{ + int extsize = crypto_alg_extsize(alg); + + if (alg->cra_type != &crypto_acomp_type) + extsize += sizeof(struct crypto_scomp *); + + return extsize; +} + +static const struct crypto_type crypto_acomp_type = { + .extsize = crypto_acomp_extsize, + .init_tfm = crypto_acomp_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_acomp_show, +#endif + .report = crypto_acomp_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_ACOMPRESS_MASK, + .type = CRYPTO_ALG_TYPE_ACOMPRESS, + .tfmsize = offsetof(struct crypto_acomp, base), +}; + +struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, + u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_acomp_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_acomp); + +struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct acomp_req *req; + + req = __acomp_request_alloc(acomp); + if (req && (tfm->__crt_alg->cra_type != &crypto_acomp_type)) + return crypto_acomp_scomp_alloc_ctx(req); + + return req; +} +EXPORT_SYMBOL_GPL(acomp_request_alloc); + +void acomp_request_free(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + + if (tfm->__crt_alg->cra_type != &crypto_acomp_type) + crypto_acomp_scomp_free_ctx(req); + + if (req->flags & CRYPTO_ACOMP_ALLOC_OUTPUT) { + acomp->dst_free(req->dst); + req->dst = NULL; + } + + __acomp_request_free(req); +} +EXPORT_SYMBOL_GPL(acomp_request_free); + +int crypto_register_acomp(struct acomp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + base->cra_type = &crypto_acomp_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_ACOMPRESS; + + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_acomp); + +int crypto_unregister_acomp(struct acomp_alg *alg) +{ + return crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_acomp); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Asynchronous compression type"); diff --git a/crypto/algboss.c b/crypto/algboss.c index 6e39d9c..ccb85e1 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -247,12 +247,8 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) memcpy(param->alg, alg->cra_name, sizeof(param->alg)); type = alg->cra_flags; - /* This piece of crap needs to disappear into per-type test hooks. */ - if (!((type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & - CRYPTO_ALG_TYPE_BLKCIPHER_MASK) && !(type & CRYPTO_ALG_GENIV) && - ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == - CRYPTO_ALG_TYPE_BLKCIPHER ? alg->cra_blkcipher.ivsize : - alg->cra_ablkcipher.ivsize)) + /* Do not test internal algorithms. */ + if (type & CRYPTO_ALG_INTERNAL) type |= CRYPTO_ALG_TESTED; param->type = type; diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 235f54d..668ef40 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -454,12 +454,13 @@ static int aead_recvmsg_async(struct socket *sock, struct msghdr *msg, used -= ctx->aead_assoclen; /* take over all tx sgls from ctx */ - areq->tsgl = sock_kmalloc(sk, sizeof(*areq->tsgl) * sgl->cur, + areq->tsgl = sock_kmalloc(sk, + sizeof(*areq->tsgl) * max_t(u32, sgl->cur, 1), GFP_KERNEL); if (unlikely(!areq->tsgl)) goto free; - sg_init_table(areq->tsgl, sgl->cur); + sg_init_table(areq->tsgl, max_t(u32, sgl->cur, 1)); for (i = 0; i < sgl->cur; i++) sg_set_page(&areq->tsgl[i], sg_page(&sgl->sg[i]), sgl->sg[i].length, sgl->sg[i].offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 1e38aaa..a9e79d8 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -566,8 +566,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, * need to expand */ tmp = kcalloc(tx_nents * 2, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto free; + } sg_init_table(tmp, tx_nents * 2); for (x = 0; x < tx_nents; x++) diff --git a/crypto/api.c b/crypto/api.c index bbc147c..b16ce16 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -211,8 +211,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) if (!name) return ERR_PTR(-ENOENT); + type &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); - type &= mask; alg = crypto_alg_lookup(name, type, mask); if (!alg) { @@ -310,24 +310,8 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) { const struct crypto_type *type = tfm->__crt_alg->cra_type; - if (type) { - if (tfm->exit) - tfm->exit(tfm); - return; - } - - switch (crypto_tfm_alg_type(tfm)) { - case CRYPTO_ALG_TYPE_CIPHER: - crypto_exit_cipher_ops(tfm); - break; - - case CRYPTO_ALG_TYPE_COMPRESS: - crypto_exit_compress_ops(tfm); - break; - - default: - BUG(); - } + if (type && tfm->exit) + tfm->exit(tfm); } static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/authenc.c b/crypto/authenc.c index a7e1ac7..875470b 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -324,7 +324,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm) if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher2(&ictx->enc); + enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; @@ -420,9 +420,9 @@ static int crypto_authenc_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 121010a..6f8f6b8 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -342,7 +342,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm) if (IS_ERR(auth)) return PTR_ERR(auth); - enc = crypto_spawn_skcipher2(&ictx->enc); + enc = crypto_spawn_skcipher(&ictx->enc); err = PTR_ERR(enc); if (IS_ERR(enc)) goto err_free_ahash; @@ -441,9 +441,9 @@ static int crypto_authenc_esn_create(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->enc, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->enc, enc_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->enc, enc_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_auth; diff --git a/crypto/cbc.c b/crypto/cbc.c index 780ee27..68f751a 100644 --- a/crypto/cbc.c +++ b/crypto/cbc.c @@ -1,7 +1,7 @@ /* * CBC: Cipher Block Chaining mode * - * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> + * Copyright (c) 2006-2016 Herbert Xu <herbert@gondor.apana.org.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -10,191 +10,78 @@ * */ -#include <crypto/algapi.h> +#include <crypto/cbc.h> +#include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/module.h> -#include <linux/scatterlist.h> #include <linux/slab.h> struct crypto_cbc_ctx { struct crypto_cipher *child; }; -static int crypto_cbc_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_cbc_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_cipher *child = ctx->child; int err; crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_cbc_encrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) +static inline void crypto_cbc_encrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; - - do { - crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), dst, iv); - memcpy(iv, dst, bsize); - - src += bsize; - dst += bsize; - } while ((nbytes -= bsize) >= bsize); - - return nbytes; -} - -static int crypto_cbc_encrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) -{ - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *iv = walk->iv; - - do { - crypto_xor(src, iv, bsize); - fn(crypto_cipher_tfm(tfm), src, src); - iv = src; - - src += bsize; - } while ((nbytes -= bsize) >= bsize); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - memcpy(walk->iv, iv, bsize); - - return nbytes; + crypto_cipher_encrypt_one(ctx->child, dst, src); } -static int crypto_cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct crypto_cipher *child = ctx->child; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_cbc_encrypt_inplace(desc, &walk, child); - else - nbytes = crypto_cbc_encrypt_segment(desc, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); - } - - return err; + return crypto_cbc_encrypt_walk(req, crypto_cbc_encrypt_one); } -static int crypto_cbc_decrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) +static inline void crypto_cbc_decrypt_one(struct crypto_skcipher *tfm, + const u8 *src, u8 *dst) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - u8 *iv = walk->iv; - - do { - fn(crypto_cipher_tfm(tfm), dst, src); - crypto_xor(dst, iv, bsize); - iv = src; - - src += bsize; - dst += bsize; - } while ((nbytes -= bsize) >= bsize); - - memcpy(walk->iv, iv, bsize); - - return nbytes; -} + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); -static int crypto_cbc_decrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, - struct crypto_cipher *tfm) -{ - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; - int bsize = crypto_cipher_blocksize(tfm); - unsigned int nbytes = walk->nbytes; - u8 *src = walk->src.virt.addr; - u8 last_iv[bsize]; - - /* Start of the last block. */ - src += nbytes - (nbytes & (bsize - 1)) - bsize; - memcpy(last_iv, src, bsize); - - for (;;) { - fn(crypto_cipher_tfm(tfm), src, src); - if ((nbytes -= bsize) < bsize) - break; - crypto_xor(src, src - bsize, bsize); - src -= bsize; - } - - crypto_xor(src, walk->iv, bsize); - memcpy(walk->iv, last_iv, bsize); - - return nbytes; + crypto_cipher_decrypt_one(ctx->child, dst, src); } -static int crypto_cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_cbc_ctx *ctx = crypto_blkcipher_ctx(tfm); - struct crypto_cipher *child = ctx->child; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_walk walk; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes)) { - if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_cbc_decrypt_inplace(desc, &walk, child); - else - nbytes = crypto_cbc_decrypt_segment(desc, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + while (walk.nbytes) { + err = crypto_cbc_decrypt_blocks(&walk, tfm, + crypto_cbc_decrypt_one); + err = skcipher_walk_done(&walk, err); } return err; } -static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) +static int crypto_cbc_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); @@ -205,72 +92,94 @@ static int crypto_cbc_init_tfm(struct crypto_tfm *tfm) return 0; } -static void crypto_cbc_exit_tfm(struct crypto_tfm *tfm) +static void crypto_cbc_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + crypto_free_cipher(ctx->child); } -static struct crypto_instance *crypto_cbc_alloc(struct rtattr **tb) +static void crypto_cbc_free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; + struct skcipher_instance *inst; + struct crypto_spawn *spawn; struct crypto_alg *alg; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER); if (err) - return ERR_PTR(err); + return err; + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); + err = PTR_ERR(alg); if (IS_ERR(alg)) - return ERR_CAST(alg); + goto err_free_inst; - inst = ERR_PTR(-EINVAL); - if (!is_power_of_2(alg->cra_blocksize)) - goto out_put_alg; + spawn = skcipher_instance_ctx(inst); + err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst), + CRYPTO_ALG_TYPE_MASK); + crypto_mod_put(alg); + if (err) + goto err_free_inst; - inst = crypto_alloc_instance("cbc", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg); + if (err) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + err = -EINVAL; + if (!is_power_of_2(alg->cra_blocksize)) + goto err_drop_spawn; + + inst->alg.base.cra_priority = alg->cra_priority; + inst->alg.base.cra_blocksize = alg->cra_blocksize; + inst->alg.base.cra_alignmask = alg->cra_alignmask; /* We access the data as u32s when xoring. */ - inst->alg.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + inst->alg.ivsize = alg->cra_blocksize; + inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_cbc_ctx); + inst->alg.base.cra_ctxsize = sizeof(struct crypto_cbc_ctx); - inst->alg.cra_init = crypto_cbc_init_tfm; - inst->alg.cra_exit = crypto_cbc_exit_tfm; + inst->alg.init = crypto_cbc_init_tfm; + inst->alg.exit = crypto_cbc_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_cbc_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_cbc_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_cbc_decrypt; + inst->alg.setkey = crypto_cbc_setkey; + inst->alg.encrypt = crypto_cbc_encrypt; + inst->alg.decrypt = crypto_cbc_decrypt; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + inst->free = crypto_cbc_free; -static void crypto_cbc_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_spawn(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_cbc_tmpl = { .name = "cbc", - .alloc = crypto_cbc_alloc, - .free = crypto_cbc_free, + .create = crypto_cbc_create, .module = THIS_MODULE, }; diff --git a/crypto/ccm.c b/crypto/ccm.c index 006d857..26b924d 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -462,7 +462,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctr = crypto_spawn_skcipher2(&ictx->ctr); + ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_cipher; @@ -544,9 +544,9 @@ static int crypto_ccm_create_common(struct crypto_template *tmpl, goto err_free_inst; crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ictx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_cipher; diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c index e899ef5..db1bc31 100644 --- a/crypto/chacha20poly1305.c +++ b/crypto/chacha20poly1305.c @@ -532,7 +532,7 @@ static int chachapoly_init(struct crypto_aead *tfm) if (IS_ERR(poly)) return PTR_ERR(poly); - chacha = crypto_spawn_skcipher2(&ictx->chacha); + chacha = crypto_spawn_skcipher(&ictx->chacha); if (IS_ERR(chacha)) { crypto_free_ahash(poly); return PTR_ERR(chacha); @@ -625,9 +625,9 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb, goto err_free_inst; crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->chacha, chacha_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_poly; diff --git a/crypto/cipher.c b/crypto/cipher.c index 39541e0..94fa355 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -116,7 +116,3 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) return 0; } - -void crypto_exit_cipher_ops(struct crypto_tfm *tfm) -{ -} diff --git a/crypto/cmac.c b/crypto/cmac.c index 7a8bfbd..04080dc 100644 --- a/crypto/cmac.c +++ b/crypto/cmac.c @@ -57,7 +57,8 @@ static int crypto_cmac_digest_setkey(struct crypto_shash *parent, unsigned long alignmask = crypto_shash_alignmask(parent); struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent); unsigned int bs = crypto_shash_blocksize(parent); - __be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); + __be64 *consts = PTR_ALIGN((void *)ctx->ctx, + (alignmask | (__alignof__(__be64) - 1)) + 1); u64 _const[2]; int i, err = 0; u8 msb_mask, gfmask; @@ -173,7 +174,8 @@ static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out) struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_blocksize(parent); - u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1); + u8 *consts = PTR_ALIGN((void *)tctx->ctx, + (alignmask | (__alignof__(__be64) - 1)) + 1); u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1); u8 *prev = odds + bs; unsigned int offset = 0; @@ -243,6 +245,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) case 8: break; default: + err = -EINVAL; goto out_put_alg; } @@ -257,7 +260,8 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) if (err) goto out_free_inst; - alignmask = alg->cra_alignmask | (sizeof(long) - 1); + /* We access the data as u32s when xoring. */ + alignmask = alg->cra_alignmask | (__alignof__(u32) - 1); inst->alg.base.cra_alignmask = alignmask; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; @@ -269,7 +273,9 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb) + alg->cra_blocksize * 2; inst->alg.base.cra_ctxsize = - ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1) + ALIGN(sizeof(struct cmac_tfm_ctx), crypto_tfm_ctx_alignment()) + + ((alignmask | (__alignof__(__be64) - 1)) & + ~(crypto_tfm_ctx_alignment() - 1)) + alg->cra_blocksize * 2; inst->alg.base.cra_init = cmac_init_tfm; diff --git a/crypto/compress.c b/crypto/compress.c index c33f076..f2d5229 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -42,7 +42,3 @@ int crypto_init_compress_ops(struct crypto_tfm *tfm) return 0; } - -void crypto_exit_compress_ops(struct crypto_tfm *tfm) -{ -} diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 0c654e5..0508c48 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -17,9 +17,9 @@ * */ -#include <crypto/algapi.h> #include <crypto/internal/hash.h> #include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> #include <crypto/cryptd.h> #include <crypto/crypto_wq.h> #include <linux/atomic.h> @@ -48,6 +48,11 @@ struct cryptd_instance_ctx { struct cryptd_queue *queue; }; +struct skcipherd_instance_ctx { + struct crypto_skcipher_spawn spawn; + struct cryptd_queue *queue; +}; + struct hashd_instance_ctx { struct crypto_shash_spawn spawn; struct cryptd_queue *queue; @@ -67,6 +72,15 @@ struct cryptd_blkcipher_request_ctx { crypto_completion_t complete; }; +struct cryptd_skcipher_ctx { + atomic_t refcnt; + struct crypto_skcipher *child; +}; + +struct cryptd_skcipher_request_ctx { + crypto_completion_t complete; +}; + struct cryptd_hash_ctx { atomic_t refcnt; struct crypto_shash *child; @@ -122,7 +136,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, { int cpu, err; struct cryptd_cpu_queue *cpu_queue; - struct crypto_tfm *tfm; atomic_t *refcnt; bool may_backlog; @@ -141,7 +154,6 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, if (!atomic_read(refcnt)) goto out_put_cpu; - tfm = request->tfm; atomic_inc(refcnt); out_put_cpu: @@ -432,6 +444,216 @@ out_put_alg: return err; } +static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, + const u8 *key, unsigned int keylen) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; + int err; + + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static void cryptd_skcipher_complete(struct skcipher_request *req, int err) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + int refcnt = atomic_read(&ctx->refcnt); + + local_bh_disable(); + rctx->complete(&req->base, err); + local_bh_enable(); + + if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt)) + crypto_free_skcipher(tfm); +} + +static void cryptd_skcipher_encrypt(struct crypto_async_request *base, + int err) +{ + struct skcipher_request *req = skcipher_request_cast(base); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; + SKCIPHER_REQUEST_ON_STACK(subreq, child); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); + + req->base.complete = rctx->complete; + +out: + cryptd_skcipher_complete(req, err); +} + +static void cryptd_skcipher_decrypt(struct crypto_async_request *base, + int err) +{ + struct skcipher_request *req = skcipher_request_cast(base); + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = ctx->child; + SKCIPHER_REQUEST_ON_STACK(subreq, child); + + if (unlikely(err == -EINPROGRESS)) + goto out; + + skcipher_request_set_tfm(subreq, child); + skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, + req->iv); + + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); + + req->base.complete = rctx->complete; + +out: + cryptd_skcipher_complete(req, err); +} + +static int cryptd_skcipher_enqueue(struct skcipher_request *req, + crypto_completion_t compl) +{ + struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cryptd_queue *queue; + + queue = cryptd_get_queue(crypto_skcipher_tfm(tfm)); + rctx->complete = req->base.complete; + req->base.complete = compl; + + return cryptd_enqueue_request(queue, &req->base); +} + +static int cryptd_skcipher_encrypt_enqueue(struct skcipher_request *req) +{ + return cryptd_skcipher_enqueue(req, cryptd_skcipher_encrypt); +} + +static int cryptd_skcipher_decrypt_enqueue(struct skcipher_request *req) +{ + return cryptd_skcipher_enqueue(req, cryptd_skcipher_decrypt); +} + +static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct skcipherd_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct crypto_skcipher_spawn *spawn = &ictx->spawn; + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; + + cipher = crypto_spawn_skcipher(spawn); + if (IS_ERR(cipher)) + return PTR_ERR(cipher); + + ctx->child = cipher; + crypto_skcipher_set_reqsize( + tfm, sizeof(struct cryptd_skcipher_request_ctx)); + return 0; +} + +static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); +} + +static void cryptd_skcipher_free(struct skcipher_instance *inst) +{ + struct skcipherd_instance_ctx *ctx = skcipher_instance_ctx(inst); + + crypto_drop_skcipher(&ctx->spawn); +} + +static int cryptd_create_skcipher(struct crypto_template *tmpl, + struct rtattr **tb, + struct cryptd_queue *queue) +{ + struct skcipherd_instance_ctx *ctx; + struct skcipher_instance *inst; + struct skcipher_alg *alg; + const char *name; + u32 type; + u32 mask; + int err; + + type = 0; + mask = CRYPTO_ALG_ASYNC; + + cryptd_check_internal(tb, &type, &mask); + + name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(name)) + return PTR_ERR(name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = skcipher_instance_ctx(inst); + ctx->queue = queue; + + crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(&ctx->spawn, name, type, mask); + if (err) + goto out_free_inst; + + alg = crypto_spawn_skcipher_alg(&ctx->spawn); + err = cryptd_init_instance(skcipher_crypto_instance(inst), &alg->base); + if (err) + goto out_drop_skcipher; + + inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC | + (alg->base.cra_flags & CRYPTO_ALG_INTERNAL); + + inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg); + inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg); + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); + + inst->alg.base.cra_ctxsize = sizeof(struct cryptd_skcipher_ctx); + + inst->alg.init = cryptd_skcipher_init_tfm; + inst->alg.exit = cryptd_skcipher_exit_tfm; + + inst->alg.setkey = cryptd_skcipher_setkey; + inst->alg.encrypt = cryptd_skcipher_encrypt_enqueue; + inst->alg.decrypt = cryptd_skcipher_decrypt_enqueue; + + inst->free = cryptd_skcipher_free; + + err = skcipher_register_instance(tmpl, inst); + if (err) { +out_drop_skcipher: + crypto_drop_skcipher(&ctx->spawn); +out_free_inst: + kfree(inst); + } + return err; +} + static int cryptd_hash_init_tfm(struct crypto_tfm *tfm) { struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); @@ -895,7 +1117,11 @@ static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb) switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_BLKCIPHER: - return cryptd_create_blkcipher(tmpl, tb, &queue); + if ((algt->type & CRYPTO_ALG_TYPE_MASK) == + CRYPTO_ALG_TYPE_BLKCIPHER) + return cryptd_create_blkcipher(tmpl, tb, &queue); + + return cryptd_create_skcipher(tmpl, tb, &queue); case CRYPTO_ALG_TYPE_DIGEST: return cryptd_create_hash(tmpl, tb, &queue); case CRYPTO_ALG_TYPE_AEAD: @@ -985,6 +1211,58 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm) } EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher); +struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, + u32 type, u32 mask) +{ + char cryptd_alg_name[CRYPTO_MAX_ALG_NAME]; + struct cryptd_skcipher_ctx *ctx; + struct crypto_skcipher *tfm; + + if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME, + "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-EINVAL); + + tfm = crypto_alloc_skcipher(cryptd_alg_name, type, mask); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + if (tfm->base.__crt_alg->cra_module != THIS_MODULE) { + crypto_free_skcipher(tfm); + return ERR_PTR(-EINVAL); + } + + ctx = crypto_skcipher_ctx(tfm); + atomic_set(&ctx->refcnt, 1); + + return container_of(tfm, struct cryptd_skcipher, base); +} +EXPORT_SYMBOL_GPL(cryptd_alloc_skcipher); + +struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + return ctx->child; +} +EXPORT_SYMBOL_GPL(cryptd_skcipher_child); + +bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + return atomic_read(&ctx->refcnt) - 1; +} +EXPORT_SYMBOL_GPL(cryptd_skcipher_queued); + +void cryptd_free_skcipher(struct cryptd_skcipher *tfm) +{ + struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); + + if (atomic_dec_and_test(&ctx->refcnt)) + crypto_free_skcipher(&tfm->base); +} +EXPORT_SYMBOL_GPL(cryptd_free_skcipher); + struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name, u32 type, u32 mask) { diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 6989ba0..f1bf341 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -47,7 +47,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, /* If another context is idling then defer */ if (engine->idling) { - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); goto out; } @@ -58,7 +58,7 @@ static void crypto_pump_requests(struct crypto_engine *engine, /* Only do teardown in the thread */ if (!in_kthread) { - kthread_queue_work(&engine->kworker, + kthread_queue_work(engine->kworker, &engine->pump_requests); goto out; } @@ -189,7 +189,7 @@ int crypto_transfer_cipher_request(struct crypto_engine *engine, ret = ablkcipher_enqueue_request(&engine->queue, req); if (!engine->busy && need_pump) - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); spin_unlock_irqrestore(&engine->queue_lock, flags); return ret; @@ -231,7 +231,7 @@ int crypto_transfer_hash_request(struct crypto_engine *engine, ret = ahash_enqueue_request(&engine->queue, req); if (!engine->busy && need_pump) - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); spin_unlock_irqrestore(&engine->queue_lock, flags); return ret; @@ -284,7 +284,7 @@ void crypto_finalize_cipher_request(struct crypto_engine *engine, req->base.complete(&req->base, err); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); } EXPORT_SYMBOL_GPL(crypto_finalize_cipher_request); @@ -321,7 +321,7 @@ void crypto_finalize_hash_request(struct crypto_engine *engine, req->base.complete(&req->base, err); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); } EXPORT_SYMBOL_GPL(crypto_finalize_hash_request); @@ -345,7 +345,7 @@ int crypto_engine_start(struct crypto_engine *engine) engine->running = true; spin_unlock_irqrestore(&engine->queue_lock, flags); - kthread_queue_work(&engine->kworker, &engine->pump_requests); + kthread_queue_work(engine->kworker, &engine->pump_requests); return 0; } @@ -422,11 +422,8 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) crypto_init_queue(&engine->queue, CRYPTO_ENGINE_MAX_QLEN); spin_lock_init(&engine->queue_lock); - kthread_init_worker(&engine->kworker); - engine->kworker_task = kthread_run(kthread_worker_fn, - &engine->kworker, "%s", - engine->name); - if (IS_ERR(engine->kworker_task)) { + engine->kworker = kthread_create_worker(0, "%s", engine->name); + if (IS_ERR(engine->kworker)) { dev_err(dev, "failed to create crypto request pump task\n"); return NULL; } @@ -434,7 +431,7 @@ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) if (engine->rt) { dev_info(dev, "will run requests pump with realtime priority\n"); - sched_setscheduler(engine->kworker_task, SCHED_FIFO, ¶m); + sched_setscheduler(engine->kworker->task, SCHED_FIFO, ¶m); } return engine; @@ -455,8 +452,7 @@ int crypto_engine_exit(struct crypto_engine *engine) if (ret) return ret; - kthread_flush_worker(&engine->kworker); - kthread_stop(engine->kworker_task); + kthread_destroy_worker(engine->kworker); return 0; } diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 1c57054..a90404a 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -112,6 +112,21 @@ nla_put_failure: return -EMSGSIZE; } +static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_acomp racomp; + + strncpy(racomp.type, "acomp", sizeof(racomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, + sizeof(struct crypto_report_acomp), &racomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; @@ -186,7 +201,11 @@ static int crypto_report_one(struct crypto_alg *alg, goto nla_put_failure; break; + case CRYPTO_ALG_TYPE_ACOMPRESS: + if (crypto_report_acomp(skb, alg)) + goto nla_put_failure; + break; case CRYPTO_ALG_TYPE_AKCIPHER: if (crypto_report_akcipher(skb, alg)) goto nla_put_failure; diff --git a/crypto/ctr.c b/crypto/ctr.c index ff4d21e..a9a7a44 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -312,7 +312,7 @@ static int crypto_rfc3686_init_tfm(struct crypto_skcipher *tfm) unsigned long align; unsigned int reqsize; - cipher = crypto_spawn_skcipher2(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -370,9 +370,9 @@ static int crypto_rfc3686_create(struct crypto_template *tmpl, spawn = skcipher_instance_ctx(inst); crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); - err = crypto_grab_skcipher2(spawn, cipher_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_free_inst; diff --git a/crypto/cts.c b/crypto/cts.c index 5197618..00254d7 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -290,7 +290,7 @@ static int crypto_cts_init_tfm(struct crypto_skcipher *tfm) unsigned bsize; unsigned align; - cipher = crypto_spawn_skcipher2(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); @@ -348,9 +348,9 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) spawn = skcipher_instance_ctx(inst); crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); - err = crypto_grab_skcipher2(spawn, cipher_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_free_inst; diff --git a/crypto/deflate.c b/crypto/deflate.c index 95d8d37..f942cb3 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -32,6 +32,7 @@ #include <linux/interrupt.h> #include <linux/mm.h> #include <linux/net.h> +#include <crypto/internal/scompress.h> #define DEFLATE_DEF_LEVEL Z_DEFAULT_COMPRESSION #define DEFLATE_DEF_WINBITS 11 @@ -101,9 +102,8 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx) vfree(ctx->decomp_stream.workspace); } -static int deflate_init(struct crypto_tfm *tfm) +static int __deflate_init(void *ctx) { - struct deflate_ctx *ctx = crypto_tfm_ctx(tfm); int ret; ret = deflate_comp_init(ctx); @@ -116,19 +116,55 @@ out: return ret; } -static void deflate_exit(struct crypto_tfm *tfm) +static void *deflate_alloc_ctx(struct crypto_scomp *tfm) +{ + struct deflate_ctx *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + ret = __deflate_init(ctx); + if (ret) { + kfree(ctx); + return ERR_PTR(ret); + } + + return ctx; +} + +static int deflate_init(struct crypto_tfm *tfm) { struct deflate_ctx *ctx = crypto_tfm_ctx(tfm); + return __deflate_init(ctx); +} + +static void __deflate_exit(void *ctx) +{ deflate_comp_exit(ctx); deflate_decomp_exit(ctx); } -static int deflate_compress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static void deflate_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + __deflate_exit(ctx); + kzfree(ctx); +} + +static void deflate_exit(struct crypto_tfm *tfm) +{ + struct deflate_ctx *ctx = crypto_tfm_ctx(tfm); + + __deflate_exit(ctx); +} + +static int __deflate_compress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { int ret = 0; - struct deflate_ctx *dctx = crypto_tfm_ctx(tfm); + struct deflate_ctx *dctx = ctx; struct z_stream_s *stream = &dctx->comp_stream; ret = zlib_deflateReset(stream); @@ -153,12 +189,27 @@ out: return ret; } -static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int deflate_compress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct deflate_ctx *dctx = crypto_tfm_ctx(tfm); + + return __deflate_compress(src, slen, dst, dlen, dctx); +} + +static int deflate_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __deflate_compress(src, slen, dst, dlen, ctx); +} + +static int __deflate_decompress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { int ret = 0; - struct deflate_ctx *dctx = crypto_tfm_ctx(tfm); + struct deflate_ctx *dctx = ctx; struct z_stream_s *stream = &dctx->decomp_stream; ret = zlib_inflateReset(stream); @@ -194,6 +245,21 @@ out: return ret; } +static int deflate_decompress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct deflate_ctx *dctx = crypto_tfm_ctx(tfm); + + return __deflate_decompress(src, slen, dst, dlen, dctx); +} + +static int deflate_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __deflate_decompress(src, slen, dst, dlen, ctx); +} + static struct crypto_alg alg = { .cra_name = "deflate", .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, @@ -206,14 +272,39 @@ static struct crypto_alg alg = { .coa_decompress = deflate_decompress } } }; +static struct scomp_alg scomp = { + .alloc_ctx = deflate_alloc_ctx, + .free_ctx = deflate_free_ctx, + .compress = deflate_scompress, + .decompress = deflate_sdecompress, + .base = { + .cra_name = "deflate", + .cra_driver_name = "deflate-scomp", + .cra_module = THIS_MODULE, + } +}; + static int __init deflate_mod_init(void) { - return crypto_register_alg(&alg); + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg); + return ret; + } + + return ret; } static void __exit deflate_mod_fini(void) { crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); } module_init(deflate_mod_init); diff --git a/crypto/dh.c b/crypto/dh.c index 9d19360..ddcb528 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -118,7 +118,7 @@ static int dh_compute_value(struct kpp_request *req) if (req->src) { base = mpi_read_raw_from_sgl(req->src, req->src_len); if (!base) { - ret = EINVAL; + ret = -EINVAL; goto err_free_val; } } else { diff --git a/crypto/drbg.c b/crypto/drbg.c index 053035b..8a4d98b 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1782,6 +1782,7 @@ static int drbg_kcapi_sym_ctr(struct drbg_state *drbg, memcpy(outbuf, drbg->outscratchpad, cryptlen); outlen -= cryptlen; + outbuf += cryptlen; } ret = 0; diff --git a/crypto/gcm.c b/crypto/gcm.c index f624ac9..b7ad808 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -575,7 +575,7 @@ static int crypto_gcm_init_tfm(struct crypto_aead *tfm) if (IS_ERR(ghash)) return PTR_ERR(ghash); - ctr = crypto_spawn_skcipher2(&ictx->ctr); + ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_hash; @@ -663,20 +663,20 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl, goto err_drop_ghash; crypto_set_skcipher_spawn(&ctx->ctr, aead_crypto_instance(inst)); - err = crypto_grab_skcipher2(&ctx->ctr, ctr_name, 0, - crypto_requires_sync(algt->type, - algt->mask)); + err = crypto_grab_skcipher(&ctx->ctr, ctr_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); if (err) goto err_drop_ghash; ctr = crypto_spawn_skcipher_alg(&ctx->ctr); /* We only support 16-byte blocks. */ + err = -EINVAL; if (crypto_skcipher_alg_ivsize(ctr) != 16) goto out_put_ctr; /* Not a stream cipher? */ - err = -EINVAL; if (ctr->base.cra_blocksize != 1) goto out_put_ctr; diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c index 5276607..72015fe 100644 --- a/crypto/gf128mul.c +++ b/crypto/gf128mul.c @@ -263,48 +263,6 @@ EXPORT_SYMBOL(gf128mul_bbe); * t[1][BYTE] contains g*x^8*BYTE * .. * t[15][BYTE] contains g*x^120*BYTE */ -struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g) -{ - struct gf128mul_64k *t; - int i, j, k; - - t = kzalloc(sizeof(*t), GFP_KERNEL); - if (!t) - goto out; - - for (i = 0; i < 16; i++) { - t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL); - if (!t->t[i]) { - gf128mul_free_64k(t); - t = NULL; - goto out; - } - } - - t->t[0]->t[128] = *g; - for (j = 64; j > 0; j >>= 1) - gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]); - - for (i = 0;;) { - for (j = 2; j < 256; j += j) - for (k = 1; k < j; ++k) - be128_xor(&t->t[i]->t[j + k], - &t->t[i]->t[j], &t->t[i]->t[k]); - - if (++i >= 16) - break; - - for (j = 128; j > 0; j >>= 1) { - t->t[i]->t[j] = t->t[i - 1]->t[j]; - gf128mul_x8_lle(&t->t[i]->t[j]); - } - } - -out: - return t; -} -EXPORT_SYMBOL(gf128mul_init_64k_lle); - struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g) { struct gf128mul_64k *t; @@ -352,24 +310,11 @@ void gf128mul_free_64k(struct gf128mul_64k *t) int i; for (i = 0; i < 16; i++) - kfree(t->t[i]); - kfree(t); + kzfree(t->t[i]); + kzfree(t); } EXPORT_SYMBOL(gf128mul_free_64k); -void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t) -{ - u8 *ap = (u8 *)a; - be128 r[1]; - int i; - - *r = t->t[0]->t[ap[0]]; - for (i = 1; i < 16; ++i) - be128_xor(r, r, &t->t[i]->t[ap[i]]); - *a = *r; -} -EXPORT_SYMBOL(gf128mul_64k_lle); - void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t) { u8 *ap = (u8 *)a; diff --git a/crypto/internal.h b/crypto/internal.h index 7eefcdb..f073204 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -76,9 +76,6 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask); int crypto_init_cipher_ops(struct crypto_tfm *tfm); int crypto_init_compress_ops(struct crypto_tfm *tfm); -void crypto_exit_cipher_ops(struct crypto_tfm *tfm); -void crypto_exit_compress_ops(struct crypto_tfm *tfm); - struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask); void crypto_larval_kill(struct crypto_alg *alg); struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask); diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index c4938497..787dccc 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -39,7 +39,6 @@ #include <linux/module.h> #include <linux/slab.h> -#include <linux/module.h> #include <linux/fips.h> #include <linux/time.h> #include <linux/crypto.h> diff --git a/crypto/lrw.c b/crypto/lrw.c index 6f9908a..ecd8474 100644 --- a/crypto/lrw.c +++ b/crypto/lrw.c @@ -17,7 +17,8 @@ * * The test vectors are included in the testing module tcrypt.[ch] */ -#include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> @@ -29,11 +30,30 @@ #include <crypto/gf128mul.h> #include <crypto/lrw.h> +#define LRW_BUFFER_SIZE 128u + struct priv { - struct crypto_cipher *child; + struct crypto_skcipher *child; struct lrw_table_ctx table; }; +struct rctx { + be128 buf[LRW_BUFFER_SIZE / sizeof(be128)]; + + be128 t; + + be128 *ext; + + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct scatterlist *src; + struct scatterlist *dst; + + unsigned int left; + + struct skcipher_request subreq; +}; + static inline void setbit128_bbe(void *b, int bit) { __set_bit(bit ^ (0x80 - @@ -76,32 +96,26 @@ void lrw_free_table(struct lrw_table_ctx *ctx) } EXPORT_SYMBOL_GPL(lrw_free_table); -static int setkey(struct crypto_tfm *parent, const u8 *key, +static int setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct priv *ctx = crypto_tfm_ctx(parent); - struct crypto_cipher *child = ctx->child; + struct priv *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child = ctx->child; int err, bsize = LRW_BLOCK_SIZE; const u8 *tweak = key + keylen - bsize; - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key, keylen - bsize); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen - bsize); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); if (err) return err; - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); return lrw_init_table(&ctx->table, tweak); } -struct sinfo { - be128 t; - struct crypto_tfm *tfm; - void (*fn)(struct crypto_tfm *, u8 *, const u8 *); -}; - static inline void inc(be128 *iv) { be64_add_cpu(&iv->b, 1); @@ -109,13 +123,6 @@ static inline void inc(be128 *iv) be64_add_cpu(&iv->a, 1); } -static inline void lrw_round(struct sinfo *s, void *dst, const void *src) -{ - be128_xor(dst, &s->t, src); /* PP <- T xor P */ - s->fn(s->tfm, dst, dst); /* CC <- E(Key2,PP) */ - be128_xor(dst, dst, &s->t); /* C <- T xor CC */ -} - /* this returns the number of consequative 1 bits starting * from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */ static inline int get_index128(be128 *block) @@ -135,83 +142,263 @@ static inline int get_index128(be128 *block) return x; } -static int crypt(struct blkcipher_desc *d, - struct blkcipher_walk *w, struct priv *ctx, - void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +static int post_crypt(struct skcipher_request *req) { + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; + const int bs = LRW_BLOCK_SIZE; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned offset; int err; - unsigned int avail; + + subreq = &rctx->subreq; + err = skcipher_walk_virt(&w, subreq, false); + + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wdst; + + wdst = w.dst.virt.addr; + + do { + be128_xor(wdst, buf++, wdst); + wdst++; + } while ((avail -= bs) >= bs); + + err = skcipher_walk_done(&w, avail); + } + + rctx->left -= subreq->cryptlen; + + if (err || !rctx->left) + goto out; + + rctx->dst = rctx->dstbuf; + + scatterwalk_done(&w.out, 0, 1); + sg = w.out.sg; + offset = w.out.offset; + + if (rctx->dst != sg) { + rctx->dst[0] = *sg; + sg_unmark_end(rctx->dst); + scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2); + } + rctx->dst[0].length -= offset - sg->offset; + rctx->dst[0].offset = offset; + +out: + return err; +} + +static int pre_crypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct rctx *rctx = skcipher_request_ctx(req); + struct priv *ctx = crypto_skcipher_ctx(tfm); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; const int bs = LRW_BLOCK_SIZE; - struct sinfo s = { - .tfm = crypto_cipher_tfm(ctx->child), - .fn = fn - }; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned cryptlen; + unsigned offset; be128 *iv; - u8 *wsrc; - u8 *wdst; + bool more; + int err; - err = blkcipher_walk_virt(d, w); - if (!(avail = w->nbytes)) - return err; + subreq = &rctx->subreq; + skcipher_request_set_tfm(subreq, tfm); - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + cryptlen = subreq->cryptlen; + more = rctx->left > cryptlen; + if (!more) + cryptlen = rctx->left; - /* calculate first value of T */ - iv = (be128 *)w->iv; - s.t = *iv; + skcipher_request_set_crypt(subreq, rctx->src, rctx->dst, + cryptlen, req->iv); - /* T <- I*Key2 */ - gf128mul_64k_bbe(&s.t, ctx->table.table); + err = skcipher_walk_virt(&w, subreq, false); + iv = w.iv; - goto first; + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wsrc; + be128 *wdst; + + wsrc = w.src.virt.addr; + wdst = w.dst.virt.addr; - for (;;) { do { + *buf++ = rctx->t; + be128_xor(wdst++, &rctx->t, wsrc++); + /* T <- I*Key2, using the optimization * discussed in the specification */ - be128_xor(&s.t, &s.t, + be128_xor(&rctx->t, &rctx->t, &ctx->table.mulinc[get_index128(iv)]); inc(iv); + } while ((avail -= bs) >= bs); -first: - lrw_round(&s, wdst, wsrc); + err = skcipher_walk_done(&w, avail); + } - wsrc += bs; - wdst += bs; - } while ((avail -= bs) >= bs); + skcipher_request_set_tfm(subreq, ctx->child); + skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst, + cryptlen, NULL); - err = blkcipher_walk_done(d, w, avail); - if (!(avail = w->nbytes)) - break; + if (err || !more) + goto out; + + rctx->src = rctx->srcbuf; + + scatterwalk_done(&w.in, 0, 1); + sg = w.in.sg; + offset = w.in.offset; + + if (rctx->src != sg) { + rctx->src[0] = *sg; + sg_unmark_end(rctx->src); + scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2); + } + rctx->src[0].length -= offset - sg->offset; + rctx->src[0].offset = offset; + +out: + return err; +} + +static int init_crypt(struct skcipher_request *req, crypto_completion_t done) +{ + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + gfp_t gfp; + + subreq = &rctx->subreq; + skcipher_request_set_callback(subreq, req->base.flags, done, req); + + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + rctx->ext = NULL; + + subreq->cryptlen = LRW_BUFFER_SIZE; + if (req->cryptlen > LRW_BUFFER_SIZE) { + subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); + rctx->ext = kmalloc(subreq->cryptlen, gfp); + } + + rctx->src = req->src; + rctx->dst = req->dst; + rctx->left = req->cryptlen; + + /* calculate first value of T */ + memcpy(&rctx->t, req->iv, sizeof(rctx->t)); + + /* T <- I*Key2 */ + gf128mul_64k_bbe(&rctx->t, ctx->table.table); - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + return 0; +} + +static void exit_crypt(struct skcipher_request *req) +{ + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->left = 0; + + if (rctx->ext) + kfree(rctx->ext); +} + +static int do_encrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_encrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; } + exit_crypt(req); return err; } -static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void encrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_encrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int encrypt(struct skcipher_request *req) +{ + return do_encrypt(req, init_crypt(req, encrypt_done)); +} + +static int do_decrypt(struct skcipher_request *req, int err) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_decrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, - crypto_cipher_alg(ctx->child)->cia_encrypt); + exit_crypt(req); + return err; } -static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void decrypt_done(struct crypto_async_request *areq, int err) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_decrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, - crypto_cipher_alg(ctx->child)->cia_decrypt); + skcipher_request_complete(req, err); +} + +static int decrypt(struct skcipher_request *req) +{ + return do_decrypt(req, init_crypt(req, decrypt_done)); } int lrw_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, @@ -293,95 +480,161 @@ first: } EXPORT_SYMBOL_GPL(lrw_crypt); -static int init_tfm(struct crypto_tfm *tfm) +static int init_tfm(struct crypto_skcipher *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct priv *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); + struct priv *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *cipher; - cipher = crypto_spawn_cipher(spawn); + cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); - if (crypto_cipher_blocksize(cipher) != LRW_BLOCK_SIZE) { - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - crypto_free_cipher(cipher); - return -EINVAL; - } - ctx->child = cipher; + + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(cipher) + + sizeof(struct rctx)); + return 0; } -static void exit_tfm(struct crypto_tfm *tfm) +static void exit_tfm(struct crypto_skcipher *tfm) { - struct priv *ctx = crypto_tfm_ctx(tfm); + struct priv *ctx = crypto_skcipher_ctx(tfm); lrw_free_table(&ctx->table); - crypto_free_cipher(ctx->child); + crypto_free_skcipher(ctx->child); +} + +static void free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); } -static struct crypto_instance *alloc(struct rtattr **tb) +static int create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct crypto_skcipher_spawn *spawn; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct skcipher_alg *alg; + const char *cipher_name; + char ecb_name[CRYPTO_MAX_ALG_NAME]; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + spawn = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + if (err == -ENOENT) { + err = -ENAMETOOLONG; + if (snprintf(ecb_name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", + cipher_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + err = crypto_grab_skcipher(spawn, ecb_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + } + if (err) - return ERR_PTR(err); + goto err_free_inst; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); + alg = crypto_skcipher_spawn_alg(spawn); - inst = crypto_alloc_instance("lrw", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = -EINVAL; + if (alg->base.cra_blocksize != LRW_BLOCK_SIZE) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; + if (crypto_skcipher_alg_ivsize(alg)) + goto err_drop_spawn; - if (alg->cra_alignmask < 7) inst->alg.cra_alignmask = 7; - else inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "lrw", + &alg->base); + if (err) + goto err_drop_spawn; - if (!(alg->cra_blocksize % 4)) - inst->alg.cra_alignmask |= 3; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = - alg->cra_cipher.cia_min_keysize + alg->cra_blocksize; - inst->alg.cra_blkcipher.max_keysize = - alg->cra_cipher.cia_max_keysize + alg->cra_blocksize; + err = -EINVAL; + cipher_name = alg->base.cra_name; - inst->alg.cra_ctxsize = sizeof(struct priv); + /* Alas we screwed up the naming so we have to mangle the + * cipher name. + */ + if (!strncmp(cipher_name, "ecb(", 4)) { + unsigned len; - inst->alg.cra_init = init_tfm; - inst->alg.cra_exit = exit_tfm; + len = strlcpy(ecb_name, cipher_name + 4, sizeof(ecb_name)); + if (len < 2 || len >= sizeof(ecb_name)) + goto err_drop_spawn; - inst->alg.cra_blkcipher.setkey = setkey; - inst->alg.cra_blkcipher.encrypt = encrypt; - inst->alg.cra_blkcipher.decrypt = decrypt; + if (ecb_name[len - 1] != ')') + goto err_drop_spawn; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + ecb_name[len - 1] = 0; -static void free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "lrw(%s)", ecb_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + } + + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask | + (__alignof__(u64) - 1); + + inst->alg.ivsize = LRW_BLOCK_SIZE; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) + + LRW_BLOCK_SIZE; + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) + + LRW_BLOCK_SIZE; + + inst->alg.base.cra_ctxsize = sizeof(struct priv); + + inst->alg.init = init_tfm; + inst->alg.exit = exit_tfm; + + inst->alg.setkey = setkey; + inst->alg.encrypt = encrypt; + inst->alg.decrypt = decrypt; + + inst->free = free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_skcipher(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_tmpl = { .name = "lrw", - .alloc = alloc, - .free = free, + .create = create, .module = THIS_MODULE, }; diff --git a/crypto/lz4.c b/crypto/lz4.c index aefbcea..99c1b2c 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -23,36 +23,53 @@ #include <linux/crypto.h> #include <linux/vmalloc.h> #include <linux/lz4.h> +#include <crypto/internal/scompress.h> struct lz4_ctx { void *lz4_comp_mem; }; +static void *lz4_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = vmalloc(LZ4_MEM_COMPRESS); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + static int lz4_init(struct crypto_tfm *tfm) { struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS); - if (!ctx->lz4_comp_mem) + ctx->lz4_comp_mem = lz4_alloc_ctx(NULL); + if (IS_ERR(ctx->lz4_comp_mem)) return -ENOMEM; return 0; } +static void lz4_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + vfree(ctx); +} + static void lz4_exit(struct crypto_tfm *tfm) { struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); - vfree(ctx->lz4_comp_mem); + + lz4_free_ctx(NULL, ctx->lz4_comp_mem); } -static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int __lz4_compress_crypto(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { - struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); size_t tmp_len = *dlen; int err; - err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem); + err = lz4_compress(src, slen, dst, &tmp_len, ctx); if (err < 0) return -EINVAL; @@ -61,8 +78,23 @@ static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, return 0; } -static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int lz4_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lz4_compress_crypto(src, slen, dst, dlen, ctx); +} + +static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lz4_ctx *ctx = crypto_tfm_ctx(tfm); + + return __lz4_compress_crypto(src, slen, dst, dlen, ctx->lz4_comp_mem); +} + +static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { int err; size_t tmp_len = *dlen; @@ -76,6 +108,20 @@ static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, return err; } +static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lz4_decompress_crypto(src, slen, dst, dlen, NULL); +} + +static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, + unsigned int *dlen) +{ + return __lz4_decompress_crypto(src, slen, dst, dlen, NULL); +} + static struct crypto_alg alg_lz4 = { .cra_name = "lz4", .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, @@ -89,14 +135,39 @@ static struct crypto_alg alg_lz4 = { .coa_decompress = lz4_decompress_crypto } } }; +static struct scomp_alg scomp = { + .alloc_ctx = lz4_alloc_ctx, + .free_ctx = lz4_free_ctx, + .compress = lz4_scompress, + .decompress = lz4_sdecompress, + .base = { + .cra_name = "lz4", + .cra_driver_name = "lz4-scomp", + .cra_module = THIS_MODULE, + } +}; + static int __init lz4_mod_init(void) { - return crypto_register_alg(&alg_lz4); + int ret; + + ret = crypto_register_alg(&alg_lz4); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg_lz4); + return ret; + } + + return ret; } static void __exit lz4_mod_fini(void) { crypto_unregister_alg(&alg_lz4); + crypto_unregister_scomp(&scomp); } module_init(lz4_mod_init); diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c index a1d3b5b..75ffc4a 100644 --- a/crypto/lz4hc.c +++ b/crypto/lz4hc.c @@ -22,37 +22,53 @@ #include <linux/crypto.h> #include <linux/vmalloc.h> #include <linux/lz4.h> +#include <crypto/internal/scompress.h> struct lz4hc_ctx { void *lz4hc_comp_mem; }; +static void *lz4hc_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = vmalloc(LZ4HC_MEM_COMPRESS); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + static int lz4hc_init(struct crypto_tfm *tfm) { struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS); - if (!ctx->lz4hc_comp_mem) + ctx->lz4hc_comp_mem = lz4hc_alloc_ctx(NULL); + if (IS_ERR(ctx->lz4hc_comp_mem)) return -ENOMEM; return 0; } +static void lz4hc_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + vfree(ctx); +} + static void lz4hc_exit(struct crypto_tfm *tfm) { struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); - vfree(ctx->lz4hc_comp_mem); + lz4hc_free_ctx(NULL, ctx->lz4hc_comp_mem); } -static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int __lz4hc_compress_crypto(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { - struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); size_t tmp_len = *dlen; int err; - err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem); + err = lz4hc_compress(src, slen, dst, &tmp_len, ctx); if (err < 0) return -EINVAL; @@ -61,8 +77,25 @@ static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, return 0; } -static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int lz4hc_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lz4hc_compress_crypto(src, slen, dst, dlen, ctx); +} + +static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, + unsigned int *dlen) +{ + struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm); + + return __lz4hc_compress_crypto(src, slen, dst, dlen, + ctx->lz4hc_comp_mem); +} + +static int __lz4hc_decompress_crypto(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { int err; size_t tmp_len = *dlen; @@ -76,6 +109,20 @@ static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, return err; } +static int lz4hc_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL); +} + +static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, + unsigned int *dlen) +{ + return __lz4hc_decompress_crypto(src, slen, dst, dlen, NULL); +} + static struct crypto_alg alg_lz4hc = { .cra_name = "lz4hc", .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, @@ -89,14 +136,39 @@ static struct crypto_alg alg_lz4hc = { .coa_decompress = lz4hc_decompress_crypto } } }; +static struct scomp_alg scomp = { + .alloc_ctx = lz4hc_alloc_ctx, + .free_ctx = lz4hc_free_ctx, + .compress = lz4hc_scompress, + .decompress = lz4hc_sdecompress, + .base = { + .cra_name = "lz4hc", + .cra_driver_name = "lz4hc-scomp", + .cra_module = THIS_MODULE, + } +}; + static int __init lz4hc_mod_init(void) { - return crypto_register_alg(&alg_lz4hc); + int ret; + + ret = crypto_register_alg(&alg_lz4hc); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg_lz4hc); + return ret; + } + + return ret; } static void __exit lz4hc_mod_fini(void) { crypto_unregister_alg(&alg_lz4hc); + crypto_unregister_scomp(&scomp); } module_init(lz4hc_mod_init); diff --git a/crypto/lzo.c b/crypto/lzo.c index c3f3dd9..168df78 100644 --- a/crypto/lzo.c +++ b/crypto/lzo.c @@ -22,40 +22,55 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/lzo.h> +#include <crypto/internal/scompress.h> struct lzo_ctx { void *lzo_comp_mem; }; +static void *lzo_alloc_ctx(struct crypto_scomp *tfm) +{ + void *ctx; + + ctx = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN); + if (!ctx) + ctx = vmalloc(LZO1X_MEM_COMPRESS); + if (!ctx) + return ERR_PTR(-ENOMEM); + + return ctx; +} + static int lzo_init(struct crypto_tfm *tfm) { struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->lzo_comp_mem = kmalloc(LZO1X_MEM_COMPRESS, - GFP_KERNEL | __GFP_NOWARN); - if (!ctx->lzo_comp_mem) - ctx->lzo_comp_mem = vmalloc(LZO1X_MEM_COMPRESS); - if (!ctx->lzo_comp_mem) + ctx->lzo_comp_mem = lzo_alloc_ctx(NULL); + if (IS_ERR(ctx->lzo_comp_mem)) return -ENOMEM; return 0; } +static void lzo_free_ctx(struct crypto_scomp *tfm, void *ctx) +{ + kvfree(ctx); +} + static void lzo_exit(struct crypto_tfm *tfm) { struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); - kvfree(ctx->lzo_comp_mem); + lzo_free_ctx(NULL, ctx->lzo_comp_mem); } -static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int __lzo_compress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) { - struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ int err; - err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx->lzo_comp_mem); + err = lzo1x_1_compress(src, slen, dst, &tmp_len, ctx); if (err != LZO_E_OK) return -EINVAL; @@ -64,8 +79,23 @@ static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, return 0; } -static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int lzo_compress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + struct lzo_ctx *ctx = crypto_tfm_ctx(tfm); + + return __lzo_compress(src, slen, dst, dlen, ctx->lzo_comp_mem); +} + +static int lzo_scompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzo_compress(src, slen, dst, dlen, ctx); +} + +static int __lzo_decompress(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) { int err; size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */ @@ -77,7 +107,19 @@ static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, *dlen = tmp_len; return 0; +} +static int lzo_decompress(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen) +{ + return __lzo_decompress(src, slen, dst, dlen); +} + +static int lzo_sdecompress(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx) +{ + return __lzo_decompress(src, slen, dst, dlen); } static struct crypto_alg alg = { @@ -88,18 +130,43 @@ static struct crypto_alg alg = { .cra_init = lzo_init, .cra_exit = lzo_exit, .cra_u = { .compress = { - .coa_compress = lzo_compress, - .coa_decompress = lzo_decompress } } + .coa_compress = lzo_compress, + .coa_decompress = lzo_decompress } } +}; + +static struct scomp_alg scomp = { + .alloc_ctx = lzo_alloc_ctx, + .free_ctx = lzo_free_ctx, + .compress = lzo_scompress, + .decompress = lzo_sdecompress, + .base = { + .cra_name = "lzo", + .cra_driver_name = "lzo-scomp", + .cra_module = THIS_MODULE, + } }; static int __init lzo_mod_init(void) { - return crypto_register_alg(&alg); + int ret; + + ret = crypto_register_alg(&alg); + if (ret) + return ret; + + ret = crypto_register_scomp(&scomp); + if (ret) { + crypto_unregister_alg(&alg); + return ret; + } + + return ret; } static void __exit lzo_mod_fini(void) { crypto_unregister_alg(&alg); + crypto_unregister_scomp(&scomp); } module_init(lzo_mod_init); diff --git a/crypto/pcbc.c b/crypto/pcbc.c index f654965..e4538e0 100644 --- a/crypto/pcbc.c +++ b/crypto/pcbc.c @@ -14,40 +14,37 @@ * */ -#include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/scatterlist.h> #include <linux/slab.h> struct crypto_pcbc_ctx { struct crypto_cipher *child; }; -static int crypto_pcbc_setkey(struct crypto_tfm *parent, const u8 *key, +static int crypto_pcbc_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(parent); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_cipher *child = ctx->child; int err; crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(child, key, keylen); - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); return err; } -static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_encrypt_segment(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; @@ -56,7 +53,7 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, do { crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), dst, iv); + crypto_cipher_encrypt_one(tfm, dst, iv); memcpy(iv, dst, bsize); crypto_xor(iv, src, bsize); @@ -67,12 +64,10 @@ static int crypto_pcbc_encrypt_segment(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_encrypt_inplace(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_encrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; @@ -82,7 +77,7 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, do { memcpy(tmpbuf, src, bsize); crypto_xor(iv, src, bsize); - fn(crypto_cipher_tfm(tfm), src, iv); + crypto_cipher_encrypt_one(tfm, src, iv); memcpy(iv, tmpbuf, bsize); crypto_xor(iv, src, bsize); @@ -94,38 +89,34 @@ static int crypto_pcbc_encrypt_inplace(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_pcbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *child = ctx->child; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_pcbc_encrypt_inplace(desc, &walk, + nbytes = crypto_pcbc_encrypt_inplace(req, &walk, child); else - nbytes = crypto_pcbc_encrypt_segment(desc, &walk, + nbytes = crypto_pcbc_encrypt_segment(req, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_decrypt_segment(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; @@ -133,7 +124,7 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, u8 *iv = walk->iv; do { - fn(crypto_cipher_tfm(tfm), dst, src); + crypto_cipher_decrypt_one(tfm, dst, src); crypto_xor(dst, iv, bsize); memcpy(iv, src, bsize); crypto_xor(iv, dst, bsize); @@ -147,21 +138,19 @@ static int crypto_pcbc_decrypt_segment(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, - struct blkcipher_walk *walk, +static int crypto_pcbc_decrypt_inplace(struct skcipher_request *req, + struct skcipher_walk *walk, struct crypto_cipher *tfm) { - void (*fn)(struct crypto_tfm *, u8 *, const u8 *) = - crypto_cipher_alg(tfm)->cia_decrypt; int bsize = crypto_cipher_blocksize(tfm); unsigned int nbytes = walk->nbytes; u8 *src = walk->src.virt.addr; u8 *iv = walk->iv; - u8 tmpbuf[bsize]; + u8 tmpbuf[bsize] __attribute__ ((aligned(__alignof__(u32)))); do { memcpy(tmpbuf, src, bsize); - fn(crypto_cipher_tfm(tfm), src, src); + crypto_cipher_decrypt_one(tfm, src, src); crypto_xor(src, iv, bsize); memcpy(iv, tmpbuf, bsize); crypto_xor(iv, src, bsize); @@ -174,37 +163,35 @@ static int crypto_pcbc_decrypt_inplace(struct blkcipher_desc *desc, return nbytes; } -static int crypto_pcbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) +static int crypto_pcbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct crypto_pcbc_ctx *ctx = crypto_blkcipher_ctx(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *child = ctx->child; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { if (walk.src.virt.addr == walk.dst.virt.addr) - nbytes = crypto_pcbc_decrypt_inplace(desc, &walk, + nbytes = crypto_pcbc_decrypt_inplace(req, &walk, child); else - nbytes = crypto_pcbc_decrypt_segment(desc, &walk, + nbytes = crypto_pcbc_decrypt_segment(req, &walk, child); - err = blkcipher_walk_done(desc, &walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm) +static int crypto_pcbc_init_tfm(struct crypto_skcipher *tfm) { - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct crypto_spawn *spawn = skcipher_instance_ctx(inst); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_cipher *cipher; cipher = crypto_spawn_cipher(spawn); @@ -215,68 +202,98 @@ static int crypto_pcbc_init_tfm(struct crypto_tfm *tfm) return 0; } -static void crypto_pcbc_exit_tfm(struct crypto_tfm *tfm) +static void crypto_pcbc_exit_tfm(struct crypto_skcipher *tfm) { - struct crypto_pcbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_pcbc_ctx *ctx = crypto_skcipher_ctx(tfm); + crypto_free_cipher(ctx->child); } -static struct crypto_instance *crypto_pcbc_alloc(struct rtattr **tb) +static void crypto_pcbc_free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct crypto_spawn *spawn; struct crypto_alg *alg; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); - if (err) - return ERR_PTR(err); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if (((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) & + ~CRYPTO_ALG_INTERNAL) + return -EINVAL; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); + inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER | + (algt->type & CRYPTO_ALG_INTERNAL), + CRYPTO_ALG_TYPE_MASK | + (algt->mask & CRYPTO_ALG_INTERNAL)); + err = PTR_ERR(alg); if (IS_ERR(alg)) - return ERR_CAST(alg); + goto err_free_inst; + + spawn = skcipher_instance_ctx(inst); + err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst), + CRYPTO_ALG_TYPE_MASK); + crypto_mod_put(alg); + if (err) + goto err_free_inst; - inst = crypto_alloc_instance("pcbc", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg); + if (err) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_blkcipher_type; + inst->alg.base.cra_flags = alg->cra_flags & CRYPTO_ALG_INTERNAL; + inst->alg.base.cra_priority = alg->cra_priority; + inst->alg.base.cra_blocksize = alg->cra_blocksize; + inst->alg.base.cra_alignmask = alg->cra_alignmask; /* We access the data as u32s when xoring. */ - inst->alg.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = alg->cra_cipher.cia_max_keysize; + inst->alg.ivsize = alg->cra_blocksize; + inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize; + inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize; - inst->alg.cra_ctxsize = sizeof(struct crypto_pcbc_ctx); + inst->alg.base.cra_ctxsize = sizeof(struct crypto_pcbc_ctx); - inst->alg.cra_init = crypto_pcbc_init_tfm; - inst->alg.cra_exit = crypto_pcbc_exit_tfm; + inst->alg.init = crypto_pcbc_init_tfm; + inst->alg.exit = crypto_pcbc_exit_tfm; - inst->alg.cra_blkcipher.setkey = crypto_pcbc_setkey; - inst->alg.cra_blkcipher.encrypt = crypto_pcbc_encrypt; - inst->alg.cra_blkcipher.decrypt = crypto_pcbc_decrypt; + inst->alg.setkey = crypto_pcbc_setkey; + inst->alg.encrypt = crypto_pcbc_encrypt; + inst->alg.decrypt = crypto_pcbc_decrypt; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + inst->free = crypto_pcbc_free; -static void crypto_pcbc_free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_spawn(spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_pcbc_tmpl = { .name = "pcbc", - .alloc = crypto_pcbc_alloc, - .free = crypto_pcbc_free, + .create = crypto_pcbc_create, .module = THIS_MODULE, }; diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c index 2df9835d..b1c2d57 100644 --- a/crypto/poly1305_generic.c +++ b/crypto/poly1305_generic.c @@ -17,6 +17,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> +#include <asm/unaligned.h> static inline u64 mlt(u64 a, u64 b) { @@ -33,11 +34,6 @@ static inline u32 and(u32 v, u32 mask) return v & mask; } -static inline u32 le32_to_cpuvp(const void *p) -{ - return le32_to_cpup(p); -} - int crypto_poly1305_init(struct shash_desc *desc) { struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); @@ -65,19 +61,19 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_setkey); static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key) { /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ - dctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff; - dctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03; - dctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff; - dctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff; - dctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff; + dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff; + dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03; + dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff; + dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff; + dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff; } static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key) { - dctx->s[0] = le32_to_cpuvp(key + 0); - dctx->s[1] = le32_to_cpuvp(key + 4); - dctx->s[2] = le32_to_cpuvp(key + 8); - dctx->s[3] = le32_to_cpuvp(key + 12); + dctx->s[0] = get_unaligned_le32(key + 0); + dctx->s[1] = get_unaligned_le32(key + 4); + dctx->s[2] = get_unaligned_le32(key + 8); + dctx->s[3] = get_unaligned_le32(key + 12); } unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx, @@ -137,11 +133,11 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx, while (likely(srclen >= POLY1305_BLOCK_SIZE)) { /* h += m[i] */ - h0 += (le32_to_cpuvp(src + 0) >> 0) & 0x3ffffff; - h1 += (le32_to_cpuvp(src + 3) >> 2) & 0x3ffffff; - h2 += (le32_to_cpuvp(src + 6) >> 4) & 0x3ffffff; - h3 += (le32_to_cpuvp(src + 9) >> 6) & 0x3ffffff; - h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit; + h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff; + h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff; + h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff; + h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff; + h4 += (get_unaligned_le32(src + 12) >> 8) | hibit; /* h *= r */ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + diff --git a/crypto/scompress.c b/crypto/scompress.c new file mode 100644 index 0000000..35e396d --- /dev/null +++ b/crypto/scompress.c @@ -0,0 +1,356 @@ +/* + * Synchronous Compression operations + * + * Copyright 2015 LG Electronics Inc. + * Copyright (c) 2016, Intel Corporation + * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <crypto/algapi.h> +#include <linux/cryptouser.h> +#include <net/netlink.h> +#include <linux/scatterlist.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/acompress.h> +#include <crypto/internal/scompress.h> +#include "internal.h" + +static const struct crypto_type crypto_scomp_type; +static void * __percpu *scomp_src_scratches; +static void * __percpu *scomp_dst_scratches; +static int scomp_scratch_users; +static DEFINE_MUTEX(scomp_lock); + +#ifdef CONFIG_NET +static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_comp rscomp; + + strncpy(rscomp.type, "scomp", sizeof(rscomp.type)); + + if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, + sizeof(struct crypto_report_comp), &rscomp)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); + +static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) +{ + seq_puts(m, "type : scomp\n"); +} + +static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) +{ + return 0; +} + +static void crypto_scomp_free_scratches(void * __percpu *scratches) +{ + int i; + + if (!scratches) + return; + + for_each_possible_cpu(i) + vfree(*per_cpu_ptr(scratches, i)); + + free_percpu(scratches); +} + +static void * __percpu *crypto_scomp_alloc_scratches(void) +{ + void * __percpu *scratches; + int i; + + scratches = alloc_percpu(void *); + if (!scratches) + return NULL; + + for_each_possible_cpu(i) { + void *scratch; + + scratch = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i)); + if (!scratch) + goto error; + *per_cpu_ptr(scratches, i) = scratch; + } + + return scratches; + +error: + crypto_scomp_free_scratches(scratches); + return NULL; +} + +static void crypto_scomp_free_all_scratches(void) +{ + if (!--scomp_scratch_users) { + crypto_scomp_free_scratches(scomp_src_scratches); + crypto_scomp_free_scratches(scomp_dst_scratches); + scomp_src_scratches = NULL; + scomp_dst_scratches = NULL; + } +} + +static int crypto_scomp_alloc_all_scratches(void) +{ + if (!scomp_scratch_users++) { + scomp_src_scratches = crypto_scomp_alloc_scratches(); + if (!scomp_src_scratches) + return -ENOMEM; + scomp_dst_scratches = crypto_scomp_alloc_scratches(); + if (!scomp_dst_scratches) + return -ENOMEM; + } + return 0; +} + +static void crypto_scomp_sg_free(struct scatterlist *sgl) +{ + int i, n; + struct page *page; + + if (!sgl) + return; + + n = sg_nents(sgl); + for_each_sg(sgl, sgl, n, i) { + page = sg_page(sgl); + if (page) + __free_page(page); + } + + kfree(sgl); +} + +static struct scatterlist *crypto_scomp_sg_alloc(size_t size, gfp_t gfp) +{ + struct scatterlist *sgl; + struct page *page; + int i, n; + + n = ((size - 1) >> PAGE_SHIFT) + 1; + + sgl = kmalloc_array(n, sizeof(struct scatterlist), gfp); + if (!sgl) + return NULL; + + sg_init_table(sgl, n); + + for (i = 0; i < n; i++) { + page = alloc_page(gfp); + if (!page) + goto err; + sg_set_page(sgl + i, page, PAGE_SIZE, 0); + } + + return sgl; + +err: + sg_mark_end(sgl + i); + crypto_scomp_sg_free(sgl); + return NULL; +} + +static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + void **tfm_ctx = acomp_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void **ctx = acomp_request_ctx(req); + const int cpu = get_cpu(); + u8 *scratch_src = *per_cpu_ptr(scomp_src_scratches, cpu); + u8 *scratch_dst = *per_cpu_ptr(scomp_dst_scratches, cpu); + int ret; + + if (!req->src || !req->slen || req->slen > SCOMP_SCRATCH_SIZE) { + ret = -EINVAL; + goto out; + } + + if (req->dst && !req->dlen) { + ret = -EINVAL; + goto out; + } + + if (!req->dlen || req->dlen > SCOMP_SCRATCH_SIZE) + req->dlen = SCOMP_SCRATCH_SIZE; + + scatterwalk_map_and_copy(scratch_src, req->src, 0, req->slen, 0); + if (dir) + ret = crypto_scomp_compress(scomp, scratch_src, req->slen, + scratch_dst, &req->dlen, *ctx); + else + ret = crypto_scomp_decompress(scomp, scratch_src, req->slen, + scratch_dst, &req->dlen, *ctx); + if (!ret) { + if (!req->dst) { + req->dst = crypto_scomp_sg_alloc(req->dlen, + req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC); + if (!req->dst) + goto out; + } + scatterwalk_map_and_copy(scratch_dst, req->dst, 0, req->dlen, + 1); + } +out: + put_cpu(); + return ret; +} + +static int scomp_acomp_compress(struct acomp_req *req) +{ + return scomp_acomp_comp_decomp(req, 1); +} + +static int scomp_acomp_decompress(struct acomp_req *req) +{ + return scomp_acomp_comp_decomp(req, 0); +} + +static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm) +{ + struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); + + crypto_free_scomp(*ctx); +} + +int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) +{ + struct crypto_alg *calg = tfm->__crt_alg; + struct crypto_acomp *crt = __crypto_acomp_tfm(tfm); + struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp; + + if (!crypto_mod_get(calg)) + return -EAGAIN; + + scomp = crypto_create_tfm(calg, &crypto_scomp_type); + if (IS_ERR(scomp)) { + crypto_mod_put(calg); + return PTR_ERR(scomp); + } + + *ctx = scomp; + tfm->exit = crypto_exit_scomp_ops_async; + + crt->compress = scomp_acomp_compress; + crt->decompress = scomp_acomp_decompress; + crt->dst_free = crypto_scomp_sg_free; + crt->reqsize = sizeof(void *); + + return 0; +} + +struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void *ctx; + + ctx = crypto_scomp_alloc_ctx(scomp); + if (IS_ERR(ctx)) { + kfree(req); + return NULL; + } + + *req->__ctx = ctx; + + return req; +} + +void crypto_acomp_scomp_free_ctx(struct acomp_req *req) +{ + struct crypto_acomp *acomp = crypto_acomp_reqtfm(req); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp); + struct crypto_scomp **tfm_ctx = crypto_tfm_ctx(tfm); + struct crypto_scomp *scomp = *tfm_ctx; + void *ctx = *req->__ctx; + + if (ctx) + crypto_scomp_free_ctx(scomp, ctx); +} + +static const struct crypto_type crypto_scomp_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_scomp_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_scomp_show, +#endif + .report = crypto_scomp_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_SCOMPRESS, + .tfmsize = offsetof(struct crypto_scomp, base), +}; + +int crypto_register_scomp(struct scomp_alg *alg) +{ + struct crypto_alg *base = &alg->base; + int ret = -ENOMEM; + + mutex_lock(&scomp_lock); + if (crypto_scomp_alloc_all_scratches()) + goto error; + + base->cra_type = &crypto_scomp_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS; + + ret = crypto_register_alg(base); + if (ret) + goto error; + + mutex_unlock(&scomp_lock); + return ret; + +error: + crypto_scomp_free_all_scratches(); + mutex_unlock(&scomp_lock); + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_scomp); + +int crypto_unregister_scomp(struct scomp_alg *alg) +{ + int ret; + + mutex_lock(&scomp_lock); + ret = crypto_unregister_alg(&alg->base); + crypto_scomp_free_all_scratches(); + mutex_unlock(&scomp_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_unregister_scomp); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Synchronous compression type"); diff --git a/crypto/simd.c b/crypto/simd.c new file mode 100644 index 0000000..8820337 --- /dev/null +++ b/crypto/simd.c @@ -0,0 +1,226 @@ +/* + * Shared crypto simd helpers + * + * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au> + * + * Based on aesni-intel_glue.c by: + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/preempt.h> +#include <asm/simd.h> + +struct simd_skcipher_alg { + const char *ialg_name; + struct skcipher_alg alg; +}; + +struct simd_skcipher_ctx { + struct cryptd_skcipher *cryptd_tfm; +}; + +static int simd_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child = &ctx->cryptd_tfm->base; + int err; + + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, key_len); + crypto_skcipher_set_flags(tfm, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); + return err; +} + +static int simd_skcipher_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq; + struct crypto_skcipher *child; + + subreq = skcipher_request_ctx(req); + *subreq = *req; + + if (!may_use_simd() || + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm))) + child = &ctx->cryptd_tfm->base; + else + child = cryptd_skcipher_child(ctx->cryptd_tfm); + + skcipher_request_set_tfm(subreq, child); + + return crypto_skcipher_encrypt(subreq); +} + +static int simd_skcipher_decrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_request *subreq; + struct crypto_skcipher *child; + + subreq = skcipher_request_ctx(req); + *subreq = *req; + + if (!may_use_simd() || + (in_atomic() && cryptd_skcipher_queued(ctx->cryptd_tfm))) + child = &ctx->cryptd_tfm->base; + else + child = cryptd_skcipher_child(ctx->cryptd_tfm); + + skcipher_request_set_tfm(subreq, child); + + return crypto_skcipher_decrypt(subreq); +} + +static void simd_skcipher_exit(struct crypto_skcipher *tfm) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + + cryptd_free_skcipher(ctx->cryptd_tfm); +} + +static int simd_skcipher_init(struct crypto_skcipher *tfm) +{ + struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); + struct cryptd_skcipher *cryptd_tfm; + struct simd_skcipher_alg *salg; + struct skcipher_alg *alg; + unsigned reqsize; + + alg = crypto_skcipher_alg(tfm); + salg = container_of(alg, struct simd_skcipher_alg, alg); + + cryptd_tfm = cryptd_alloc_skcipher(salg->ialg_name, + CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + ctx->cryptd_tfm = cryptd_tfm; + + reqsize = sizeof(struct skcipher_request); + reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + + crypto_skcipher_set_reqsize(tfm, reqsize); + + return 0; +} + +struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname, + const char *drvname, + const char *basename) +{ + struct simd_skcipher_alg *salg; + struct crypto_skcipher *tfm; + struct skcipher_alg *ialg; + struct skcipher_alg *alg; + int err; + + tfm = crypto_alloc_skcipher(basename, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return ERR_CAST(tfm); + + ialg = crypto_skcipher_alg(tfm); + + salg = kzalloc(sizeof(*salg), GFP_KERNEL); + if (!salg) { + salg = ERR_PTR(-ENOMEM); + goto out_put_tfm; + } + + salg->ialg_name = basename; + alg = &salg->alg; + + err = -ENAMETOOLONG; + if (snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", algname) >= + CRYPTO_MAX_ALG_NAME) + goto out_free_salg; + + if (snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + drvname) >= CRYPTO_MAX_ALG_NAME) + goto out_free_salg; + + alg->base.cra_flags = CRYPTO_ALG_ASYNC; + alg->base.cra_priority = ialg->base.cra_priority; + alg->base.cra_blocksize = ialg->base.cra_blocksize; + alg->base.cra_alignmask = ialg->base.cra_alignmask; + alg->base.cra_module = ialg->base.cra_module; + alg->base.cra_ctxsize = sizeof(struct simd_skcipher_ctx); + + alg->ivsize = ialg->ivsize; + alg->chunksize = ialg->chunksize; + alg->min_keysize = ialg->min_keysize; + alg->max_keysize = ialg->max_keysize; + + alg->init = simd_skcipher_init; + alg->exit = simd_skcipher_exit; + + alg->setkey = simd_skcipher_setkey; + alg->encrypt = simd_skcipher_encrypt; + alg->decrypt = simd_skcipher_decrypt; + + err = crypto_register_skcipher(alg); + if (err) + goto out_free_salg; + +out_put_tfm: + crypto_free_skcipher(tfm); + return salg; + +out_free_salg: + kfree(salg); + salg = ERR_PTR(err); + goto out_put_tfm; +} +EXPORT_SYMBOL_GPL(simd_skcipher_create_compat); + +struct simd_skcipher_alg *simd_skcipher_create(const char *algname, + const char *basename) +{ + char drvname[CRYPTO_MAX_ALG_NAME]; + + if (snprintf(drvname, CRYPTO_MAX_ALG_NAME, "simd-%s", basename) >= + CRYPTO_MAX_ALG_NAME) + return ERR_PTR(-ENAMETOOLONG); + + return simd_skcipher_create_compat(algname, drvname, basename); +} +EXPORT_SYMBOL_GPL(simd_skcipher_create); + +void simd_skcipher_free(struct simd_skcipher_alg *salg) +{ + crypto_unregister_skcipher(&salg->alg); + kfree(salg); +} +EXPORT_SYMBOL_GPL(simd_skcipher_free); + +MODULE_LICENSE("GPL"); diff --git a/crypto/skcipher.c b/crypto/skcipher.c index f7d0018..aca07c6 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -14,9 +14,12 @@ * */ +#include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <linux/bug.h> #include <linux/cryptouser.h> +#include <linux/list.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/seq_file.h> @@ -24,6 +27,543 @@ #include "internal.h" +enum { + SKCIPHER_WALK_PHYS = 1 << 0, + SKCIPHER_WALK_SLOW = 1 << 1, + SKCIPHER_WALK_COPY = 1 << 2, + SKCIPHER_WALK_DIFF = 1 << 3, + SKCIPHER_WALK_SLEEP = 1 << 4, +}; + +struct skcipher_walk_buffer { + struct list_head entry; + struct scatter_walk dst; + unsigned int len; + u8 *data; + u8 buffer[]; +}; + +static int skcipher_walk_next(struct skcipher_walk *walk); + +static inline void skcipher_unmap(struct scatter_walk *walk, void *vaddr) +{ + if (PageHighMem(scatterwalk_page(walk))) + kunmap_atomic(vaddr); +} + +static inline void *skcipher_map(struct scatter_walk *walk) +{ + struct page *page = scatterwalk_page(walk); + + return (PageHighMem(page) ? kmap_atomic(page) : page_address(page)) + + offset_in_page(walk->offset); +} + +static inline void skcipher_map_src(struct skcipher_walk *walk) +{ + walk->src.virt.addr = skcipher_map(&walk->in); +} + +static inline void skcipher_map_dst(struct skcipher_walk *walk) +{ + walk->dst.virt.addr = skcipher_map(&walk->out); +} + +static inline void skcipher_unmap_src(struct skcipher_walk *walk) +{ + skcipher_unmap(&walk->in, walk->src.virt.addr); +} + +static inline void skcipher_unmap_dst(struct skcipher_walk *walk) +{ + skcipher_unmap(&walk->out, walk->dst.virt.addr); +} + +static inline gfp_t skcipher_walk_gfp(struct skcipher_walk *walk) +{ + return walk->flags & SKCIPHER_WALK_SLEEP ? GFP_KERNEL : GFP_ATOMIC; +} + +/* Get a spot of the specified length that does not straddle a page. + * The caller needs to ensure that there is enough space for this operation. + */ +static inline u8 *skcipher_get_spot(u8 *start, unsigned int len) +{ + u8 *end_page = (u8 *)(((unsigned long)(start + len - 1)) & PAGE_MASK); + + return max(start, end_page); +} + +static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) +{ + u8 *addr; + + addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); + addr = skcipher_get_spot(addr, bsize); + scatterwalk_copychunks(addr, &walk->out, bsize, + (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); + return 0; +} + +int skcipher_walk_done(struct skcipher_walk *walk, int err) +{ + unsigned int n = walk->nbytes - err; + unsigned int nbytes; + + nbytes = walk->total - n; + + if (unlikely(err < 0)) { + nbytes = 0; + n = 0; + } else if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | + SKCIPHER_WALK_SLOW | + SKCIPHER_WALK_COPY | + SKCIPHER_WALK_DIFF)))) { +unmap_src: + skcipher_unmap_src(walk); + } else if (walk->flags & SKCIPHER_WALK_DIFF) { + skcipher_unmap_dst(walk); + goto unmap_src; + } else if (walk->flags & SKCIPHER_WALK_COPY) { + skcipher_map_dst(walk); + memcpy(walk->dst.virt.addr, walk->page, n); + skcipher_unmap_dst(walk); + } else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) { + if (WARN_ON(err)) { + err = -EINVAL; + nbytes = 0; + } else + n = skcipher_done_slow(walk, n); + } + + if (err > 0) + err = 0; + + walk->total = nbytes; + walk->nbytes = nbytes; + + scatterwalk_advance(&walk->in, n); + scatterwalk_advance(&walk->out, n); + scatterwalk_done(&walk->in, 0, nbytes); + scatterwalk_done(&walk->out, 1, nbytes); + + if (nbytes) { + crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ? + CRYPTO_TFM_REQ_MAY_SLEEP : 0); + return skcipher_walk_next(walk); + } + + /* Short-circuit for the common/fast path. */ + if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) + goto out; + + if (walk->flags & SKCIPHER_WALK_PHYS) + goto out; + + if (walk->iv != walk->oiv) + memcpy(walk->oiv, walk->iv, walk->ivsize); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); + +out: + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_done); + +void skcipher_walk_complete(struct skcipher_walk *walk, int err) +{ + struct skcipher_walk_buffer *p, *tmp; + + list_for_each_entry_safe(p, tmp, &walk->buffers, entry) { + u8 *data; + + if (err) + goto done; + + data = p->data; + if (!data) { + data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); + data = skcipher_get_spot(data, walk->chunksize); + } + + scatterwalk_copychunks(data, &p->dst, p->len, 1); + + if (offset_in_page(p->data) + p->len + walk->chunksize > + PAGE_SIZE) + free_page((unsigned long)p->data); + +done: + list_del(&p->entry); + kfree(p); + } + + if (!err && walk->iv != walk->oiv) + memcpy(walk->oiv, walk->iv, walk->ivsize); + if (walk->buffer != walk->page) + kfree(walk->buffer); + if (walk->page) + free_page((unsigned long)walk->page); +} +EXPORT_SYMBOL_GPL(skcipher_walk_complete); + +static void skcipher_queue_write(struct skcipher_walk *walk, + struct skcipher_walk_buffer *p) +{ + p->dst = walk->out; + list_add_tail(&p->entry, &walk->buffers); +} + +static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize) +{ + bool phys = walk->flags & SKCIPHER_WALK_PHYS; + unsigned alignmask = walk->alignmask; + struct skcipher_walk_buffer *p; + unsigned a; + unsigned n; + u8 *buffer; + void *v; + + if (!phys) { + buffer = walk->buffer ?: walk->page; + if (buffer) + goto ok; + } + + /* Start with the minimum alignment of kmalloc. */ + a = crypto_tfm_ctx_alignment() - 1; + n = bsize; + + if (phys) { + /* Calculate the minimum alignment of p->buffer. */ + a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1; + n += sizeof(*p); + } + + /* Minimum size to align p->buffer by alignmask. */ + n += alignmask & ~a; + + /* Minimum size to ensure p->buffer does not straddle a page. */ + n += (bsize - 1) & ~(alignmask | a); + + v = kzalloc(n, skcipher_walk_gfp(walk)); + if (!v) + return skcipher_walk_done(walk, -ENOMEM); + + if (phys) { + p = v; + p->len = bsize; + skcipher_queue_write(walk, p); + buffer = p->buffer; + } else { + walk->buffer = v; + buffer = v; + } + +ok: + walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1); + walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize); + walk->src.virt.addr = walk->dst.virt.addr; + + scatterwalk_copychunks(walk->src.virt.addr, &walk->in, bsize, 0); + + walk->nbytes = bsize; + walk->flags |= SKCIPHER_WALK_SLOW; + + return 0; +} + +static int skcipher_next_copy(struct skcipher_walk *walk) +{ + struct skcipher_walk_buffer *p; + u8 *tmp = walk->page; + + skcipher_map_src(walk); + memcpy(tmp, walk->src.virt.addr, walk->nbytes); + skcipher_unmap_src(walk); + + walk->src.virt.addr = tmp; + walk->dst.virt.addr = tmp; + + if (!(walk->flags & SKCIPHER_WALK_PHYS)) + return 0; + + p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk)); + if (!p) + return -ENOMEM; + + p->data = walk->page; + p->len = walk->nbytes; + skcipher_queue_write(walk, p); + + if (offset_in_page(walk->page) + walk->nbytes + walk->chunksize > + PAGE_SIZE) + walk->page = NULL; + else + walk->page += walk->nbytes; + + return 0; +} + +static int skcipher_next_fast(struct skcipher_walk *walk) +{ + unsigned long diff; + + walk->src.phys.page = scatterwalk_page(&walk->in); + walk->src.phys.offset = offset_in_page(walk->in.offset); + walk->dst.phys.page = scatterwalk_page(&walk->out); + walk->dst.phys.offset = offset_in_page(walk->out.offset); + + if (walk->flags & SKCIPHER_WALK_PHYS) + return 0; + + diff = walk->src.phys.offset - walk->dst.phys.offset; + diff |= walk->src.virt.page - walk->dst.virt.page; + + skcipher_map_src(walk); + walk->dst.virt.addr = walk->src.virt.addr; + + if (diff) { + walk->flags |= SKCIPHER_WALK_DIFF; + skcipher_map_dst(walk); + } + + return 0; +} + +static int skcipher_walk_next(struct skcipher_walk *walk) +{ + unsigned int bsize; + unsigned int n; + int err; + + walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | + SKCIPHER_WALK_DIFF); + + n = walk->total; + bsize = min(walk->chunksize, max(n, walk->blocksize)); + n = scatterwalk_clamp(&walk->in, n); + n = scatterwalk_clamp(&walk->out, n); + + if (unlikely(n < bsize)) { + if (unlikely(walk->total < walk->blocksize)) + return skcipher_walk_done(walk, -EINVAL); + +slow_path: + err = skcipher_next_slow(walk, bsize); + goto set_phys_lowmem; + } + + if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) { + if (!walk->page) { + gfp_t gfp = skcipher_walk_gfp(walk); + + walk->page = (void *)__get_free_page(gfp); + if (!walk->page) + goto slow_path; + } + + walk->nbytes = min_t(unsigned, n, + PAGE_SIZE - offset_in_page(walk->page)); + walk->flags |= SKCIPHER_WALK_COPY; + err = skcipher_next_copy(walk); + goto set_phys_lowmem; + } + + walk->nbytes = n; + + return skcipher_next_fast(walk); + +set_phys_lowmem: + if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) { + walk->src.phys.page = virt_to_page(walk->src.virt.addr); + walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); + walk->src.phys.offset &= PAGE_SIZE - 1; + walk->dst.phys.offset &= PAGE_SIZE - 1; + } + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_next); + +static int skcipher_copy_iv(struct skcipher_walk *walk) +{ + unsigned a = crypto_tfm_ctx_alignment() - 1; + unsigned alignmask = walk->alignmask; + unsigned ivsize = walk->ivsize; + unsigned bs = walk->chunksize; + unsigned aligned_bs; + unsigned size; + u8 *iv; + + aligned_bs = ALIGN(bs, alignmask); + + /* Minimum size to align buffer by alignmask. */ + size = alignmask & ~a; + + if (walk->flags & SKCIPHER_WALK_PHYS) + size += ivsize; + else { + size += aligned_bs + ivsize; + + /* Minimum size to ensure buffer does not straddle a page. */ + size += (bs - 1) & ~(alignmask | a); + } + + walk->buffer = kmalloc(size, skcipher_walk_gfp(walk)); + if (!walk->buffer) + return -ENOMEM; + + iv = PTR_ALIGN(walk->buffer, alignmask + 1); + iv = skcipher_get_spot(iv, bs) + aligned_bs; + + walk->iv = memcpy(iv, walk->iv, walk->ivsize); + return 0; +} + +static int skcipher_walk_first(struct skcipher_walk *walk) +{ + walk->nbytes = 0; + + if (WARN_ON_ONCE(in_irq())) + return -EDEADLK; + + if (unlikely(!walk->total)) + return 0; + + walk->buffer = NULL; + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { + int err = skcipher_copy_iv(walk); + if (err) + return err; + } + + walk->page = NULL; + walk->nbytes = walk->total; + + return skcipher_walk_next(walk); +} + +static int skcipher_walk_skcipher(struct skcipher_walk *walk, + struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + scatterwalk_start(&walk->in, req->src); + scatterwalk_start(&walk->out, req->dst); + + walk->total = req->cryptlen; + walk->iv = req->iv; + walk->oiv = req->iv; + + walk->flags &= ~SKCIPHER_WALK_SLEEP; + walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + SKCIPHER_WALK_SLEEP : 0; + + walk->blocksize = crypto_skcipher_blocksize(tfm); + walk->chunksize = crypto_skcipher_chunksize(tfm); + walk->ivsize = crypto_skcipher_ivsize(tfm); + walk->alignmask = crypto_skcipher_alignmask(tfm); + + return skcipher_walk_first(walk); +} + +int skcipher_walk_virt(struct skcipher_walk *walk, + struct skcipher_request *req, bool atomic) +{ + int err; + + walk->flags &= ~SKCIPHER_WALK_PHYS; + + err = skcipher_walk_skcipher(walk, req); + + walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0; + + return err; +} +EXPORT_SYMBOL_GPL(skcipher_walk_virt); + +void skcipher_walk_atomise(struct skcipher_walk *walk) +{ + walk->flags &= ~SKCIPHER_WALK_SLEEP; +} +EXPORT_SYMBOL_GPL(skcipher_walk_atomise); + +int skcipher_walk_async(struct skcipher_walk *walk, + struct skcipher_request *req) +{ + walk->flags |= SKCIPHER_WALK_PHYS; + + INIT_LIST_HEAD(&walk->buffers); + + return skcipher_walk_skcipher(walk, req); +} +EXPORT_SYMBOL_GPL(skcipher_walk_async); + +static int skcipher_walk_aead_common(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + int err; + + walk->flags &= ~SKCIPHER_WALK_PHYS; + + scatterwalk_start(&walk->in, req->src); + scatterwalk_start(&walk->out, req->dst); + + scatterwalk_copychunks(NULL, &walk->in, req->assoclen, 2); + scatterwalk_copychunks(NULL, &walk->out, req->assoclen, 2); + + walk->iv = req->iv; + walk->oiv = req->iv; + + if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) + walk->flags |= SKCIPHER_WALK_SLEEP; + else + walk->flags &= ~SKCIPHER_WALK_SLEEP; + + walk->blocksize = crypto_aead_blocksize(tfm); + walk->chunksize = crypto_aead_chunksize(tfm); + walk->ivsize = crypto_aead_ivsize(tfm); + walk->alignmask = crypto_aead_alignmask(tfm); + + err = skcipher_walk_first(walk); + + if (atomic) + walk->flags &= ~SKCIPHER_WALK_SLEEP; + + return err; +} + +int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req, + bool atomic) +{ + walk->total = req->cryptlen; + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead); + +int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + walk->total = req->cryptlen; + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead_encrypt); + +int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + + walk->total = req->cryptlen - crypto_aead_authsize(tfm); + + return skcipher_walk_aead_common(walk, req, atomic); +} +EXPORT_SYMBOL_GPL(skcipher_walk_aead_decrypt); + static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg) { if (alg->cra_type == &crypto_blkcipher_type) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 62dffa0..f616ad7 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -33,6 +33,7 @@ #include <crypto/drbg.h> #include <crypto/akcipher.h> #include <crypto/kpp.h> +#include <crypto/acompress.h> #include "internal.h" @@ -62,7 +63,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask) */ #define IDX1 32 #define IDX2 32400 -#define IDX3 1 +#define IDX3 1511 #define IDX4 8193 #define IDX5 22222 #define IDX6 17101 @@ -1442,6 +1443,126 @@ out: return ret; } +static int test_acomp(struct crypto_acomp *tfm, struct comp_testvec *ctemplate, + struct comp_testvec *dtemplate, int ctcount, int dtcount) +{ + const char *algo = crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)); + unsigned int i; + char *output; + int ret; + struct scatterlist src, dst; + struct acomp_req *req; + struct tcrypt_result result; + + output = kmalloc(COMP_BUF_SIZE, GFP_KERNEL); + if (!output) + return -ENOMEM; + + for (i = 0; i < ctcount; i++) { + unsigned int dlen = COMP_BUF_SIZE; + int ilen = ctemplate[i].inlen; + + memset(output, 0, dlen); + init_completion(&result.completion); + sg_init_one(&src, ctemplate[i].input, ilen); + sg_init_one(&dst, output, dlen); + + req = acomp_request_alloc(tfm); + if (!req) { + pr_err("alg: acomp: request alloc failed for %s\n", + algo); + ret = -ENOMEM; + goto out; + } + + acomp_request_set_params(req, &src, &dst, ilen, dlen); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &result); + + ret = wait_async_op(&result, crypto_acomp_compress(req)); + if (ret) { + pr_err("alg: acomp: compression failed on test %d for %s: ret=%d\n", + i + 1, algo, -ret); + acomp_request_free(req); + goto out; + } + + if (req->dlen != ctemplate[i].outlen) { + pr_err("alg: acomp: Compression test %d failed for %s: output len = %d\n", + i + 1, algo, req->dlen); + ret = -EINVAL; + acomp_request_free(req); + goto out; + } + + if (memcmp(output, ctemplate[i].output, req->dlen)) { + pr_err("alg: acomp: Compression test %d failed for %s\n", + i + 1, algo); + hexdump(output, req->dlen); + ret = -EINVAL; + acomp_request_free(req); + goto out; + } + + acomp_request_free(req); + } + + for (i = 0; i < dtcount; i++) { + unsigned int dlen = COMP_BUF_SIZE; + int ilen = dtemplate[i].inlen; + + memset(output, 0, dlen); + init_completion(&result.completion); + sg_init_one(&src, dtemplate[i].input, ilen); + sg_init_one(&dst, output, dlen); + + req = acomp_request_alloc(tfm); + if (!req) { + pr_err("alg: acomp: request alloc failed for %s\n", + algo); + ret = -ENOMEM; + goto out; + } + + acomp_request_set_params(req, &src, &dst, ilen, dlen); + acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + tcrypt_complete, &result); + + ret = wait_async_op(&result, crypto_acomp_decompress(req)); + if (ret) { + pr_err("alg: acomp: decompression failed on test %d for %s: ret=%d\n", + i + 1, algo, -ret); + acomp_request_free(req); + goto out; + } + + if (req->dlen != dtemplate[i].outlen) { + pr_err("alg: acomp: Decompression test %d failed for %s: output len = %d\n", + i + 1, algo, req->dlen); + ret = -EINVAL; + acomp_request_free(req); + goto out; + } + + if (memcmp(output, dtemplate[i].output, req->dlen)) { + pr_err("alg: acomp: Decompression test %d failed for %s\n", + i + 1, algo); + hexdump(output, req->dlen); + ret = -EINVAL; + acomp_request_free(req); + goto out; + } + + acomp_request_free(req); + } + + ret = 0; + +out: + kfree(output); + return ret; +} + static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template, unsigned int tcount) { @@ -1509,7 +1630,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver, struct crypto_aead *tfm; int err = 0; - tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_aead(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: aead: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1538,7 +1659,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc, struct crypto_cipher *tfm; int err = 0; - tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_cipher(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: cipher: Failed to load transform for " "%s: %ld\n", driver, PTR_ERR(tfm)); @@ -1567,7 +1688,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc, struct crypto_skcipher *tfm; int err = 0; - tfm = crypto_alloc_skcipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_skcipher(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: skcipher: Failed to load transform for " "%s: %ld\n", driver, PTR_ERR(tfm)); @@ -1593,22 +1714,38 @@ out: static int alg_test_comp(const struct alg_test_desc *desc, const char *driver, u32 type, u32 mask) { - struct crypto_comp *tfm; + struct crypto_comp *comp; + struct crypto_acomp *acomp; int err; + u32 algo_type = type & CRYPTO_ALG_TYPE_ACOMPRESS_MASK; + + if (algo_type == CRYPTO_ALG_TYPE_ACOMPRESS) { + acomp = crypto_alloc_acomp(driver, type, mask); + if (IS_ERR(acomp)) { + pr_err("alg: acomp: Failed to load transform for %s: %ld\n", + driver, PTR_ERR(acomp)); + return PTR_ERR(acomp); + } + err = test_acomp(acomp, desc->suite.comp.comp.vecs, + desc->suite.comp.decomp.vecs, + desc->suite.comp.comp.count, + desc->suite.comp.decomp.count); + crypto_free_acomp(acomp); + } else { + comp = crypto_alloc_comp(driver, type, mask); + if (IS_ERR(comp)) { + pr_err("alg: comp: Failed to load transform for %s: %ld\n", + driver, PTR_ERR(comp)); + return PTR_ERR(comp); + } - tfm = crypto_alloc_comp(driver, type, mask); - if (IS_ERR(tfm)) { - printk(KERN_ERR "alg: comp: Failed to load transform for %s: " - "%ld\n", driver, PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - - err = test_comp(tfm, desc->suite.comp.comp.vecs, - desc->suite.comp.decomp.vecs, - desc->suite.comp.comp.count, - desc->suite.comp.decomp.count); + err = test_comp(comp, desc->suite.comp.comp.vecs, + desc->suite.comp.decomp.vecs, + desc->suite.comp.comp.count, + desc->suite.comp.decomp.count); - crypto_free_comp(tfm); + crypto_free_comp(comp); + } return err; } @@ -1618,7 +1755,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver, struct crypto_ahash *tfm; int err; - tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_ahash(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: hash: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1646,7 +1783,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc, if (err) goto out; - tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_shash(driver, type, mask); if (IS_ERR(tfm)) { printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(tfm)); @@ -1688,7 +1825,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver, struct crypto_rng *rng; int err; - rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); + rng = crypto_alloc_rng(driver, type, mask); if (IS_ERR(rng)) { printk(KERN_ERR "alg: cprng: Failed to load transform for %s: " "%ld\n", driver, PTR_ERR(rng)); @@ -1715,7 +1852,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr, if (!buf) return -ENOMEM; - drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask); + drng = crypto_alloc_rng(driver, type, mask); if (IS_ERR(drng)) { printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for " "%s\n", driver); @@ -1909,7 +2046,7 @@ static int alg_test_kpp(const struct alg_test_desc *desc, const char *driver, struct crypto_kpp *tfm; int err = 0; - tfm = crypto_alloc_kpp(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_kpp(driver, type, mask); if (IS_ERR(tfm)) { pr_err("alg: kpp: Failed to load tfm for %s: %ld\n", driver, PTR_ERR(tfm)); @@ -2068,7 +2205,7 @@ static int alg_test_akcipher(const struct alg_test_desc *desc, struct crypto_akcipher *tfm; int err = 0; - tfm = crypto_alloc_akcipher(driver, type | CRYPTO_ALG_INTERNAL, mask); + tfm = crypto_alloc_akcipher(driver, type, mask); if (IS_ERR(tfm)) { pr_err("alg: akcipher: Failed to load tfm for %s: %ld\n", driver, PTR_ERR(tfm)); @@ -2091,88 +2228,6 @@ static int alg_test_null(const struct alg_test_desc *desc, /* Please keep this list sorted by algorithm name. */ static const struct alg_test_desc alg_test_descs[] = { { - .alg = "__cbc-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__cbc-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__cbc-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__driver-cbc-camellia-aesni", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-camellia-aesni-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__driver-cbc-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__driver-ecb-camellia-aesni", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-camellia-aesni-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-cast5-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-cast6-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-avx", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-avx2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-serpent-sse2", - .test = alg_test_null, - }, { - .alg = "__driver-ecb-twofish-avx", - .test = alg_test_null, - }, { - .alg = "__driver-gcm-aes-aesni", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "__ghash-pclmulqdqni", - .test = alg_test_null, - .fips_allowed = 1, - }, { .alg = "ansi_cprng", .test = alg_test_cprng, .suite = { @@ -2659,55 +2714,6 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { - .alg = "cryptd(__driver-cbc-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__driver-cbc-camellia-aesni)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-cbc-camellia-aesni-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-cbc-serpent-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__driver-ecb-camellia-aesni)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-camellia-aesni-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-cast5-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-cast6-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-avx2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-serpent-sse2)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-ecb-twofish-avx)", - .test = alg_test_null, - }, { - .alg = "cryptd(__driver-gcm-aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { - .alg = "cryptd(__ghash-pclmulqdqni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { .alg = "ctr(aes)", .test = alg_test_skcipher, .fips_allowed = 1, @@ -3034,10 +3040,6 @@ static const struct alg_test_desc alg_test_descs[] = { .fips_allowed = 1, .test = alg_test_null, }, { - .alg = "ecb(__aes-aesni)", - .test = alg_test_null, - .fips_allowed = 1, - }, { .alg = "ecb(aes)", .test = alg_test_skcipher, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index e64a4ef..9b656be 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -1334,36 +1334,50 @@ static struct hash_testvec rmd320_tv_template[] = { } }; -#define CRCT10DIF_TEST_VECTORS 3 +#define CRCT10DIF_TEST_VECTORS ARRAY_SIZE(crct10dif_tv_template) static struct hash_testvec crct10dif_tv_template[] = { { - .plaintext = "abc", - .psize = 3, -#ifdef __LITTLE_ENDIAN - .digest = "\x3b\x44", -#else - .digest = "\x44\x3b", -#endif - }, { - .plaintext = "1234567890123456789012345678901234567890" - "123456789012345678901234567890123456789", - .psize = 79, -#ifdef __LITTLE_ENDIAN - .digest = "\x70\x4b", -#else - .digest = "\x4b\x70", -#endif - }, { - .plaintext = - "abcddddddddddddddddddddddddddddddddddddddddddddddddddddd", - .psize = 56, -#ifdef __LITTLE_ENDIAN - .digest = "\xe3\x9c", -#else - .digest = "\x9c\xe3", -#endif - .np = 2, - .tap = { 28, 28 } + .plaintext = "abc", + .psize = 3, + .digest = (u8 *)(u16 []){ 0x443b }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 79, + .digest = (u8 *)(u16 []){ 0x4b70 }, + .np = 2, + .tap = { 63, 16 }, + }, { + .plaintext = "abcdddddddddddddddddddddddddddddddddddddddd" + "ddddddddddddd", + .psize = 56, + .digest = (u8 *)(u16 []){ 0x9ce3 }, + .np = 8, + .tap = { 1, 2, 28, 7, 6, 5, 4, 3 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 319, + .digest = (u8 *)(u16 []){ 0x44c6 }, + }, { + .plaintext = "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "1234567890123456789012345678901234567890" + "123456789012345678901234567890123456789", + .psize = 319, + .digest = (u8 *)(u16 []){ 0x44c6 }, + .np = 4, + .tap = { 1, 255, 57, 6 }, } }; diff --git a/crypto/xts.c b/crypto/xts.c index 305343f..410a2e2 100644 --- a/crypto/xts.c +++ b/crypto/xts.c @@ -13,7 +13,8 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ -#include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> @@ -25,140 +26,320 @@ #include <crypto/b128ops.h> #include <crypto/gf128mul.h> +#define XTS_BUFFER_SIZE 128u + struct priv { - struct crypto_cipher *child; + struct crypto_skcipher *child; struct crypto_cipher *tweak; }; -static int setkey(struct crypto_tfm *parent, const u8 *key, +struct xts_instance_ctx { + struct crypto_skcipher_spawn spawn; + char name[CRYPTO_MAX_ALG_NAME]; +}; + +struct rctx { + be128 buf[XTS_BUFFER_SIZE / sizeof(be128)]; + + be128 t; + + be128 *ext; + + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct scatterlist *src; + struct scatterlist *dst; + + unsigned int left; + + struct skcipher_request subreq; +}; + +static int setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { - struct priv *ctx = crypto_tfm_ctx(parent); - struct crypto_cipher *child = ctx->tweak; + struct priv *ctx = crypto_skcipher_ctx(parent); + struct crypto_skcipher *child; + struct crypto_cipher *tweak; int err; - err = xts_check_key(parent, key, keylen); + err = xts_verify_key(parent, key, keylen); if (err) return err; + keylen /= 2; + /* we need two cipher instances: one to compute the initial 'tweak' * by encrypting the IV (usually the 'plain' iv) and the other * one to encrypt and decrypt the data */ /* tweak cipher, uses Key2 i.e. the second half of *key */ - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & + tweak = ctx->tweak; + crypto_cipher_clear_flags(tweak, CRYPTO_TFM_REQ_MASK); + crypto_cipher_set_flags(tweak, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key + keylen/2, keylen/2); + err = crypto_cipher_setkey(tweak, key + keylen, keylen); + crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(tweak) & + CRYPTO_TFM_RES_MASK); if (err) return err; - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - + /* data cipher, uses Key1 i.e. the first half of *key */ child = ctx->child; + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + err = crypto_skcipher_setkey(child, key, keylen); + crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & + CRYPTO_TFM_RES_MASK); - /* data cipher, uses Key1 i.e. the first half of *key */ - crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_cipher_set_flags(child, crypto_tfm_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_cipher_setkey(child, key, keylen/2); - if (err) - return err; + return err; +} - crypto_tfm_set_flags(parent, crypto_cipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); +static int post_crypt(struct skcipher_request *req) +{ + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; + const int bs = XTS_BLOCK_SIZE; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned offset; + int err; - return 0; -} + subreq = &rctx->subreq; + err = skcipher_walk_virt(&w, subreq, false); -struct sinfo { - be128 *t; - struct crypto_tfm *tfm; - void (*fn)(struct crypto_tfm *, u8 *, const u8 *); -}; + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wdst; -static inline void xts_round(struct sinfo *s, void *dst, const void *src) -{ - be128_xor(dst, s->t, src); /* PP <- T xor P */ - s->fn(s->tfm, dst, dst); /* CC <- E(Key1,PP) */ - be128_xor(dst, dst, s->t); /* C <- T xor CC */ + wdst = w.dst.virt.addr; + + do { + be128_xor(wdst, buf++, wdst); + wdst++; + } while ((avail -= bs) >= bs); + + err = skcipher_walk_done(&w, avail); + } + + rctx->left -= subreq->cryptlen; + + if (err || !rctx->left) + goto out; + + rctx->dst = rctx->dstbuf; + + scatterwalk_done(&w.out, 0, 1); + sg = w.out.sg; + offset = w.out.offset; + + if (rctx->dst != sg) { + rctx->dst[0] = *sg; + sg_unmark_end(rctx->dst); + scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 0, 2); + } + rctx->dst[0].length -= offset - sg->offset; + rctx->dst[0].offset = offset; + +out: + return err; } -static int crypt(struct blkcipher_desc *d, - struct blkcipher_walk *w, struct priv *ctx, - void (*tw)(struct crypto_tfm *, u8 *, const u8 *), - void (*fn)(struct crypto_tfm *, u8 *, const u8 *)) +static int pre_crypt(struct skcipher_request *req) { - int err; - unsigned int avail; + struct rctx *rctx = skcipher_request_ctx(req); + be128 *buf = rctx->ext ?: rctx->buf; + struct skcipher_request *subreq; const int bs = XTS_BLOCK_SIZE; - struct sinfo s = { - .tfm = crypto_cipher_tfm(ctx->child), - .fn = fn - }; - u8 *wsrc; - u8 *wdst; - - err = blkcipher_walk_virt(d, w); - if (!w->nbytes) - return err; + struct skcipher_walk w; + struct scatterlist *sg; + unsigned cryptlen; + unsigned offset; + bool more; + int err; - s.t = (be128 *)w->iv; - avail = w->nbytes; + subreq = &rctx->subreq; + cryptlen = subreq->cryptlen; - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + more = rctx->left > cryptlen; + if (!more) + cryptlen = rctx->left; - /* calculate first value of T */ - tw(crypto_cipher_tfm(ctx->tweak), w->iv, w->iv); + skcipher_request_set_crypt(subreq, rctx->src, rctx->dst, + cryptlen, NULL); - goto first; + err = skcipher_walk_virt(&w, subreq, false); - for (;;) { - do { - gf128mul_x_ble(s.t, s.t); + while (w.nbytes) { + unsigned int avail = w.nbytes; + be128 *wsrc; + be128 *wdst; -first: - xts_round(&s, wdst, wsrc); + wsrc = w.src.virt.addr; + wdst = w.dst.virt.addr; - wsrc += bs; - wdst += bs; + do { + *buf++ = rctx->t; + be128_xor(wdst++, &rctx->t, wsrc++); + gf128mul_x_ble(&rctx->t, &rctx->t); } while ((avail -= bs) >= bs); - err = blkcipher_walk_done(d, w, avail); - if (!w->nbytes) - break; + err = skcipher_walk_done(&w, avail); + } + + skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst, + cryptlen, NULL); - avail = w->nbytes; + if (err || !more) + goto out; - wsrc = w->src.virt.addr; - wdst = w->dst.virt.addr; + rctx->src = rctx->srcbuf; + + scatterwalk_done(&w.in, 0, 1); + sg = w.in.sg; + offset = w.in.offset; + + if (rctx->src != sg) { + rctx->src[0] = *sg; + sg_unmark_end(rctx->src); + scatterwalk_crypto_chain(rctx->src, sg_next(sg), 0, 2); } + rctx->src[0].length -= offset - sg->offset; + rctx->src[0].offset = offset; +out: return err; } -static int encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int init_crypt(struct skcipher_request *req, crypto_completion_t done) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + gfp_t gfp; + + subreq = &rctx->subreq; + skcipher_request_set_tfm(subreq, ctx->child); + skcipher_request_set_callback(subreq, req->base.flags, done, req); + + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + rctx->ext = NULL; + + subreq->cryptlen = XTS_BUFFER_SIZE; + if (req->cryptlen > XTS_BUFFER_SIZE) { + subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE); + rctx->ext = kmalloc(subreq->cryptlen, gfp); + } + + rctx->src = req->src; + rctx->dst = req->dst; + rctx->left = req->cryptlen; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt, - crypto_cipher_alg(ctx->child)->cia_encrypt); + /* calculate first value of T */ + crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); + + return 0; } -static int decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static void exit_crypt(struct skcipher_request *req) { - struct priv *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk w; + struct rctx *rctx = skcipher_request_ctx(req); + + rctx->left = 0; - blkcipher_walk_init(&w, dst, src, nbytes); - return crypt(desc, &w, ctx, crypto_cipher_alg(ctx->tweak)->cia_encrypt, - crypto_cipher_alg(ctx->child)->cia_decrypt); + if (rctx->ext) + kzfree(rctx->ext); +} + +static int do_encrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_encrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void encrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_encrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int encrypt(struct skcipher_request *req) +{ + return do_encrypt(req, init_crypt(req, encrypt_done)); +} + +static int do_decrypt(struct skcipher_request *req, int err) +{ + struct rctx *rctx = skcipher_request_ctx(req); + struct skcipher_request *subreq; + + subreq = &rctx->subreq; + + while (!err && rctx->left) { + err = pre_crypt(req) ?: + crypto_skcipher_decrypt(subreq) ?: + post_crypt(req); + + if (err == -EINPROGRESS || + (err == -EBUSY && + req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return err; + } + + exit_crypt(req); + return err; +} + +static void decrypt_done(struct crypto_async_request *areq, int err) +{ + struct skcipher_request *req = areq->data; + struct skcipher_request *subreq; + struct rctx *rctx; + + rctx = skcipher_request_ctx(req); + subreq = &rctx->subreq; + subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; + + err = do_decrypt(req, err ?: post_crypt(req)); + if (rctx->left) + return; + + skcipher_request_complete(req, err); +} + +static int decrypt(struct skcipher_request *req) +{ + return do_decrypt(req, init_crypt(req, decrypt_done)); } int xts_crypt(struct blkcipher_desc *desc, struct scatterlist *sdst, @@ -233,112 +414,168 @@ first: } EXPORT_SYMBOL_GPL(xts_crypt); -static int init_tfm(struct crypto_tfm *tfm) +static int init_tfm(struct crypto_skcipher *tfm) { - struct crypto_cipher *cipher; - struct crypto_instance *inst = (void *)tfm->__crt_alg; - struct crypto_spawn *spawn = crypto_instance_ctx(inst); - struct priv *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - - cipher = crypto_spawn_cipher(spawn); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) { - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - crypto_free_cipher(cipher); - return -EINVAL; - } + struct skcipher_instance *inst = skcipher_alg_instance(tfm); + struct xts_instance_ctx *ictx = skcipher_instance_ctx(inst); + struct priv *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *child; + struct crypto_cipher *tweak; - ctx->child = cipher; + child = crypto_spawn_skcipher(&ictx->spawn); + if (IS_ERR(child)) + return PTR_ERR(child); - cipher = crypto_spawn_cipher(spawn); - if (IS_ERR(cipher)) { - crypto_free_cipher(ctx->child); - return PTR_ERR(cipher); - } + ctx->child = child; - /* this check isn't really needed, leave it here just in case */ - if (crypto_cipher_blocksize(cipher) != XTS_BLOCK_SIZE) { - crypto_free_cipher(cipher); - crypto_free_cipher(ctx->child); - *flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; - return -EINVAL; + tweak = crypto_alloc_cipher(ictx->name, 0, 0); + if (IS_ERR(tweak)) { + crypto_free_skcipher(ctx->child); + return PTR_ERR(tweak); } - ctx->tweak = cipher; + ctx->tweak = tweak; + + crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + + sizeof(struct rctx)); return 0; } -static void exit_tfm(struct crypto_tfm *tfm) +static void exit_tfm(struct crypto_skcipher *tfm) { - struct priv *ctx = crypto_tfm_ctx(tfm); - crypto_free_cipher(ctx->child); + struct priv *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->child); crypto_free_cipher(ctx->tweak); } -static struct crypto_instance *alloc(struct rtattr **tb) +static void free(struct skcipher_instance *inst) +{ + crypto_drop_skcipher(skcipher_instance_ctx(inst)); + kfree(inst); +} + +static int create(struct crypto_template *tmpl, struct rtattr **tb) { - struct crypto_instance *inst; - struct crypto_alg *alg; + struct skcipher_instance *inst; + struct crypto_attr_type *algt; + struct xts_instance_ctx *ctx; + struct skcipher_alg *alg; + const char *cipher_name; int err; - err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); + algt = crypto_get_attr_type(tb); + if (IS_ERR(algt)) + return PTR_ERR(algt); + + if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask) + return -EINVAL; + + cipher_name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(cipher_name)) + return PTR_ERR(cipher_name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; + + ctx = skcipher_instance_ctx(inst); + + crypto_set_skcipher_spawn(&ctx->spawn, skcipher_crypto_instance(inst)); + err = crypto_grab_skcipher(&ctx->spawn, cipher_name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + if (err == -ENOENT) { + err = -ENAMETOOLONG; + if (snprintf(ctx->name, CRYPTO_MAX_ALG_NAME, "ecb(%s)", + cipher_name) >= CRYPTO_MAX_ALG_NAME) + goto err_free_inst; + + err = crypto_grab_skcipher(&ctx->spawn, ctx->name, 0, + crypto_requires_sync(algt->type, + algt->mask)); + } + if (err) - return ERR_PTR(err); + goto err_free_inst; - alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, - CRYPTO_ALG_TYPE_MASK); - if (IS_ERR(alg)) - return ERR_CAST(alg); + alg = crypto_skcipher_spawn_alg(&ctx->spawn); - inst = crypto_alloc_instance("xts", alg); - if (IS_ERR(inst)) - goto out_put_alg; + err = -EINVAL; + if (alg->base.cra_blocksize != XTS_BLOCK_SIZE) + goto err_drop_spawn; - inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; + if (crypto_skcipher_alg_ivsize(alg)) + goto err_drop_spawn; - if (alg->cra_alignmask < 7) - inst->alg.cra_alignmask = 7; - else - inst->alg.cra_alignmask = alg->cra_alignmask; + err = crypto_inst_setname(skcipher_crypto_instance(inst), "xts", + &alg->base); + if (err) + goto err_drop_spawn; - inst->alg.cra_type = &crypto_blkcipher_type; + err = -EINVAL; + cipher_name = alg->base.cra_name; - inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize; - inst->alg.cra_blkcipher.min_keysize = - 2 * alg->cra_cipher.cia_min_keysize; - inst->alg.cra_blkcipher.max_keysize = - 2 * alg->cra_cipher.cia_max_keysize; + /* Alas we screwed up the naming so we have to mangle the + * cipher name. + */ + if (!strncmp(cipher_name, "ecb(", 4)) { + unsigned len; - inst->alg.cra_ctxsize = sizeof(struct priv); + len = strlcpy(ctx->name, cipher_name + 4, sizeof(ctx->name)); + if (len < 2 || len >= sizeof(ctx->name)) + goto err_drop_spawn; - inst->alg.cra_init = init_tfm; - inst->alg.cra_exit = exit_tfm; + if (ctx->name[len - 1] != ')') + goto err_drop_spawn; - inst->alg.cra_blkcipher.setkey = setkey; - inst->alg.cra_blkcipher.encrypt = encrypt; - inst->alg.cra_blkcipher.decrypt = decrypt; + ctx->name[len - 1] = 0; -out_put_alg: - crypto_mod_put(alg); - return inst; -} + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "xts(%s)", ctx->name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + } else + goto err_drop_spawn; -static void free(struct crypto_instance *inst) -{ - crypto_drop_spawn(crypto_instance_ctx(inst)); + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask | + (__alignof__(u64) - 1); + + inst->alg.ivsize = XTS_BLOCK_SIZE; + inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2; + inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2; + + inst->alg.base.cra_ctxsize = sizeof(struct priv); + + inst->alg.init = init_tfm; + inst->alg.exit = exit_tfm; + + inst->alg.setkey = setkey; + inst->alg.encrypt = encrypt; + inst->alg.decrypt = decrypt; + + inst->free = free; + + err = skcipher_register_instance(tmpl, inst); + if (err) + goto err_drop_spawn; + +out: + return err; + +err_drop_spawn: + crypto_drop_skcipher(&ctx->spawn); +err_free_inst: kfree(inst); + goto out; } static struct crypto_template crypto_tmpl = { .name = "xts", - .alloc = alloc, - .free = free, + .create = create, .module = THIS_MODULE, }; diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 200dab5..ceff2fc 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -168,7 +168,7 @@ config HW_RANDOM_IXP4XX config HW_RANDOM_OMAP tristate "OMAP Random Number Generator support" - depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS + depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU default HW_RANDOM ---help--- This driver provides kernel-side support for the Random Number diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index 0fcc9e6..661c82c 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -48,6 +48,16 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max, return 0; } +static void atmel_trng_enable(struct atmel_trng *trng) +{ + writel(TRNG_KEY | 1, trng->base + TRNG_CR); +} + +static void atmel_trng_disable(struct atmel_trng *trng) +{ + writel(TRNG_KEY, trng->base + TRNG_CR); +} + static int atmel_trng_probe(struct platform_device *pdev) { struct atmel_trng *trng; @@ -71,7 +81,7 @@ static int atmel_trng_probe(struct platform_device *pdev) if (ret) return ret; - writel(TRNG_KEY | 1, trng->base + TRNG_CR); + atmel_trng_enable(trng); trng->rng.name = pdev->name; trng->rng.read = atmel_trng_read; @@ -84,7 +94,7 @@ static int atmel_trng_probe(struct platform_device *pdev) return 0; err_register: - clk_disable(trng->clk); + clk_disable_unprepare(trng->clk); return ret; } @@ -94,7 +104,7 @@ static int atmel_trng_remove(struct platform_device *pdev) hwrng_unregister(&trng->rng); - writel(TRNG_KEY, trng->base + TRNG_CR); + atmel_trng_disable(trng); clk_disable_unprepare(trng->clk); return 0; @@ -105,6 +115,7 @@ static int atmel_trng_suspend(struct device *dev) { struct atmel_trng *trng = dev_get_drvdata(dev); + atmel_trng_disable(trng); clk_disable_unprepare(trng->clk); return 0; @@ -113,8 +124,15 @@ static int atmel_trng_suspend(struct device *dev) static int atmel_trng_resume(struct device *dev) { struct atmel_trng *trng = dev_get_drvdata(dev); + int ret; - return clk_prepare_enable(trng->clk); + ret = clk_prepare_enable(trng->clk); + if (ret) + return ret; + + atmel_trng_enable(trng); + + return 0; } static const struct dev_pm_ops atmel_trng_pm_ops = { diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index d2d2c89..f976641 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -92,6 +92,7 @@ static void add_early_randomness(struct hwrng *rng) mutex_unlock(&reading_mutex); if (bytes_read > 0) add_device_randomness(rng_buffer, bytes_read); + memset(rng_buffer, 0, size); } static inline void cleanup_rng(struct kref *kref) @@ -287,6 +288,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + memset(rng_buffer, 0, rng_buffer_size()); return ret ? : err; out_unlock_reading: @@ -425,6 +427,7 @@ static int hwrng_fillfn(void *unused) /* Outside lock, sure, but y'know: randomness. */ add_hwgenerator_randomness((void *)rng_fillbuf, rc, rc * current_quality * 8 >> 10); + memset(rng_fillbuf, 0, rng_buffer_size()); } hwrng_fill = NULL; return 0; diff --git a/drivers/char/hw_random/meson-rng.c b/drivers/char/hw_random/meson-rng.c index 58bef39..119d698 100644 --- a/drivers/char/hw_random/meson-rng.c +++ b/drivers/char/hw_random/meson-rng.c @@ -110,6 +110,7 @@ static const struct of_device_id meson_rng_of_match[] = { { .compatible = "amlogic,meson-rng", }, {}, }; +MODULE_DEVICE_TABLE(of, meson_rng_of_match); static struct platform_driver meson_rng_driver = { .probe = meson_rng_probe, @@ -121,7 +122,6 @@ static struct platform_driver meson_rng_driver = { module_platform_driver(meson_rng_driver); -MODULE_ALIAS("platform:meson-rng"); MODULE_DESCRIPTION("Meson H/W Random Number Generator driver"); MODULE_AUTHOR("Lawrence Mok <lawrence.mok@amlogic.com>"); MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>"); diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c index 96fb986..841fee8 100644 --- a/drivers/char/hw_random/msm-rng.c +++ b/drivers/char/hw_random/msm-rng.c @@ -90,10 +90,6 @@ static int msm_rng_read(struct hwrng *hwrng, void *data, size_t max, bool wait) /* calculate max size bytes to transfer back to caller */ maxsize = min_t(size_t, MAX_HW_FIFO_SIZE, max); - /* no room for word data */ - if (maxsize < WORD_SZ) - return 0; - ret = clk_prepare_enable(rng->clk); if (ret) return ret; diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index f5c26a5..3ad86fd 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -28,6 +28,7 @@ #include <linux/of_device.h> #include <linux/of_address.h> #include <linux/interrupt.h> +#include <linux/clk.h> #include <asm/io.h> @@ -63,10 +64,13 @@ #define OMAP2_RNG_OUTPUT_SIZE 0x4 #define OMAP4_RNG_OUTPUT_SIZE 0x8 +#define EIP76_RNG_OUTPUT_SIZE 0x10 enum { - RNG_OUTPUT_L_REG = 0, - RNG_OUTPUT_H_REG, + RNG_OUTPUT_0_REG = 0, + RNG_OUTPUT_1_REG, + RNG_OUTPUT_2_REG, + RNG_OUTPUT_3_REG, RNG_STATUS_REG, RNG_INTMASK_REG, RNG_INTACK_REG, @@ -82,7 +86,7 @@ enum { }; static const u16 reg_map_omap2[] = { - [RNG_OUTPUT_L_REG] = 0x0, + [RNG_OUTPUT_0_REG] = 0x0, [RNG_STATUS_REG] = 0x4, [RNG_CONFIG_REG] = 0x28, [RNG_REV_REG] = 0x3c, @@ -90,8 +94,8 @@ static const u16 reg_map_omap2[] = { }; static const u16 reg_map_omap4[] = { - [RNG_OUTPUT_L_REG] = 0x0, - [RNG_OUTPUT_H_REG] = 0x4, + [RNG_OUTPUT_0_REG] = 0x0, + [RNG_OUTPUT_1_REG] = 0x4, [RNG_STATUS_REG] = 0x8, [RNG_INTMASK_REG] = 0xc, [RNG_INTACK_REG] = 0x10, @@ -106,6 +110,23 @@ static const u16 reg_map_omap4[] = { [RNG_SYSCONFIG_REG] = 0x1FE4, }; +static const u16 reg_map_eip76[] = { + [RNG_OUTPUT_0_REG] = 0x0, + [RNG_OUTPUT_1_REG] = 0x4, + [RNG_OUTPUT_2_REG] = 0x8, + [RNG_OUTPUT_3_REG] = 0xc, + [RNG_STATUS_REG] = 0x10, + [RNG_INTACK_REG] = 0x10, + [RNG_CONTROL_REG] = 0x14, + [RNG_CONFIG_REG] = 0x18, + [RNG_ALARMCNT_REG] = 0x1c, + [RNG_FROENABLE_REG] = 0x20, + [RNG_FRODETUNE_REG] = 0x24, + [RNG_ALARMMASK_REG] = 0x28, + [RNG_ALARMSTOP_REG] = 0x2c, + [RNG_REV_REG] = 0x7c, +}; + struct omap_rng_dev; /** * struct omap_rng_pdata - RNG IP block-specific data @@ -127,6 +148,8 @@ struct omap_rng_dev { void __iomem *base; struct device *dev; const struct omap_rng_pdata *pdata; + struct hwrng rng; + struct clk *clk; }; static inline u32 omap_rng_read(struct omap_rng_dev *priv, u16 reg) @@ -140,41 +163,35 @@ static inline void omap_rng_write(struct omap_rng_dev *priv, u16 reg, __raw_writel(val, priv->base + priv->pdata->regs[reg]); } -static int omap_rng_data_present(struct hwrng *rng, int wait) + +static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max, + bool wait) { struct omap_rng_dev *priv; - int data, i; + int i, present; priv = (struct omap_rng_dev *)rng->priv; + if (max < priv->pdata->data_size) + return 0; + for (i = 0; i < 20; i++) { - data = priv->pdata->data_present(priv); - if (data || !wait) + present = priv->pdata->data_present(priv); + if (present || !wait) break; - /* RNG produces data fast enough (2+ MBit/sec, even - * during "rngtest" loads, that these delays don't - * seem to trigger. We *could* use the RNG IRQ, but - * that'd be higher overhead ... so why bother? - */ + udelay(10); } - return data; -} - -static int omap_rng_data_read(struct hwrng *rng, u32 *data) -{ - struct omap_rng_dev *priv; - u32 data_size, i; - - priv = (struct omap_rng_dev *)rng->priv; - data_size = priv->pdata->data_size; + if (!present) + return 0; - for (i = 0; i < data_size / sizeof(u32); i++) - data[i] = omap_rng_read(priv, RNG_OUTPUT_L_REG + i); + memcpy_fromio(data, priv->base + priv->pdata->regs[RNG_OUTPUT_0_REG], + priv->pdata->data_size); if (priv->pdata->regs[RNG_INTACK_REG]) omap_rng_write(priv, RNG_INTACK_REG, RNG_REG_INTACK_RDY_MASK); - return data_size; + + return priv->pdata->data_size; } static int omap_rng_init(struct hwrng *rng) @@ -193,13 +210,6 @@ static void omap_rng_cleanup(struct hwrng *rng) priv->pdata->cleanup(priv); } -static struct hwrng omap_rng_ops = { - .name = "omap", - .data_present = omap_rng_data_present, - .data_read = omap_rng_data_read, - .init = omap_rng_init, - .cleanup = omap_rng_cleanup, -}; static inline u32 omap2_rng_data_present(struct omap_rng_dev *priv) { @@ -231,6 +241,38 @@ static inline u32 omap4_rng_data_present(struct omap_rng_dev *priv) return omap_rng_read(priv, RNG_STATUS_REG) & RNG_REG_STATUS_RDY; } +static int eip76_rng_init(struct omap_rng_dev *priv) +{ + u32 val; + + /* Return if RNG is already running. */ + if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK) + return 0; + + /* Number of 512 bit blocks of raw Noise Source output data that must + * be processed by either the Conditioning Function or the + * SP 800-90 DRBG ‘BC_DF’ functionality to yield a ‘full entropy’ + * output value. + */ + val = 0x5 << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT; + + /* Number of FRO samples that are XOR-ed together into one bit to be + * shifted into the main shift register + */ + val |= RNG_CONFIG_MAX_REFIL_CYCLES << RNG_CONFIG_MAX_REFIL_CYCLES_SHIFT; + omap_rng_write(priv, RNG_CONFIG_REG, val); + + /* Enable all available FROs */ + omap_rng_write(priv, RNG_FRODETUNE_REG, 0x0); + omap_rng_write(priv, RNG_FROENABLE_REG, RNG_REG_FROENABLE_MASK); + + /* Enable TRNG */ + val = RNG_CONTROL_ENABLE_TRNG_MASK; + omap_rng_write(priv, RNG_CONTROL_REG, val); + + return 0; +} + static int omap4_rng_init(struct omap_rng_dev *priv) { u32 val; @@ -300,6 +342,14 @@ static struct omap_rng_pdata omap4_rng_pdata = { .cleanup = omap4_rng_cleanup, }; +static struct omap_rng_pdata eip76_rng_pdata = { + .regs = (u16 *)reg_map_eip76, + .data_size = EIP76_RNG_OUTPUT_SIZE, + .data_present = omap4_rng_data_present, + .init = eip76_rng_init, + .cleanup = omap4_rng_cleanup, +}; + static const struct of_device_id omap_rng_of_match[] = { { .compatible = "ti,omap2-rng", @@ -309,6 +359,10 @@ static const struct of_device_id omap_rng_of_match[] = { .compatible = "ti,omap4-rng", .data = &omap4_rng_pdata, }, + { + .compatible = "inside-secure,safexcel-eip76", + .data = &eip76_rng_pdata, + }, {}, }; MODULE_DEVICE_TABLE(of, omap_rng_of_match); @@ -327,7 +381,8 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, } priv->pdata = match->data; - if (of_device_is_compatible(dev->of_node, "ti,omap4-rng")) { + if (of_device_is_compatible(dev->of_node, "ti,omap4-rng") || + of_device_is_compatible(dev->of_node, "inside-secure,safexcel-eip76")) { irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(dev, "%s: error getting IRQ resource - %d\n", @@ -343,6 +398,16 @@ static int of_get_omap_rng_device_details(struct omap_rng_dev *priv, return err; } omap_rng_write(priv, RNG_INTMASK_REG, RNG_SHUTDOWN_OFLO_MASK); + + priv->clk = of_clk_get(pdev->dev.of_node, 0); + if (IS_ERR(priv->clk) && PTR_ERR(priv->clk) == -EPROBE_DEFER) + return -EPROBE_DEFER; + if (!IS_ERR(priv->clk)) { + err = clk_prepare_enable(priv->clk); + if (err) + dev_err(&pdev->dev, "unable to enable the clk, " + "err = %d\n", err); + } } return 0; } @@ -372,7 +437,11 @@ static int omap_rng_probe(struct platform_device *pdev) if (!priv) return -ENOMEM; - omap_rng_ops.priv = (unsigned long)priv; + priv->rng.read = omap_rng_do_read; + priv->rng.init = omap_rng_init; + priv->rng.cleanup = omap_rng_cleanup; + + priv->rng.priv = (unsigned long)priv; platform_set_drvdata(pdev, priv); priv->dev = dev; @@ -383,6 +452,12 @@ static int omap_rng_probe(struct platform_device *pdev) goto err_ioremap; } + priv->rng.name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL); + if (!priv->rng.name) { + ret = -ENOMEM; + goto err_ioremap; + } + pm_runtime_enable(&pdev->dev); ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { @@ -394,20 +469,24 @@ static int omap_rng_probe(struct platform_device *pdev) ret = (dev->of_node) ? of_get_omap_rng_device_details(priv, pdev) : get_omap_rng_device_details(priv); if (ret) - goto err_ioremap; + goto err_register; - ret = hwrng_register(&omap_rng_ops); + ret = hwrng_register(&priv->rng); if (ret) goto err_register; - dev_info(&pdev->dev, "OMAP Random Number Generator ver. %02x\n", + dev_info(&pdev->dev, "Random Number Generator ver. %02x\n", omap_rng_read(priv, RNG_REV_REG)); return 0; err_register: priv->base = NULL; + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); + + if (!IS_ERR(priv->clk)) + clk_disable_unprepare(priv->clk); err_ioremap: dev_err(dev, "initialization failed.\n"); return ret; @@ -417,13 +496,16 @@ static int omap_rng_remove(struct platform_device *pdev) { struct omap_rng_dev *priv = platform_get_drvdata(pdev); - hwrng_unregister(&omap_rng_ops); + hwrng_unregister(&priv->rng); priv->pdata->cleanup(priv); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); + if (!IS_ERR(priv->clk)) + clk_disable_unprepare(priv->clk); + return 0; } diff --git a/drivers/char/hw_random/pic32-rng.c b/drivers/char/hw_random/pic32-rng.c index 11dc9b7..9b5e68a 100644 --- a/drivers/char/hw_random/pic32-rng.c +++ b/drivers/char/hw_random/pic32-rng.c @@ -62,9 +62,6 @@ static int pic32_rng_read(struct hwrng *rng, void *buf, size_t max, u32 t; unsigned int timeout = RNG_TIMEOUT; - if (max < 8) - return 0; - do { t = readl(priv->base + RNGRCNT) & RCNT_MASK; if (t == 64) { diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c index 63ce51d..d9f46b4 100644 --- a/drivers/char/hw_random/pseries-rng.c +++ b/drivers/char/hw_random/pseries-rng.c @@ -28,7 +28,6 @@ static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) { u64 buffer[PLPAR_HCALL_BUFSIZE]; - size_t size = max < 8 ? max : 8; int rc; rc = plpar_hcall(H_RANDOM, (unsigned long *)buffer); @@ -36,10 +35,10 @@ static int pseries_rng_read(struct hwrng *rng, void *data, size_t max, bool wait pr_err_ratelimited("H_RANDOM call failed %d\n", rc); return -EIO; } - memcpy(data, buffer, size); + memcpy(data, buffer, 8); /* The hypervisor interface returns 64 bits */ - return size; + return 8; } /** diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index dae1e39..d10b4ae 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -135,8 +135,7 @@ int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size) ctx->sa_out = dma_alloc_coherent(ctx->dev->core_dev->device, size * 4, &ctx->sa_out_dma_addr, GFP_ATOMIC); if (ctx->sa_out == NULL) { - dma_free_coherent(ctx->dev->core_dev->device, - ctx->sa_len * 4, + dma_free_coherent(ctx->dev->core_dev->device, size * 4, ctx->sa_in, ctx->sa_in_dma_addr); return -ENOMEM; } diff --git a/drivers/crypto/atmel-aes-regs.h b/drivers/crypto/atmel-aes-regs.h index 6c2951b..0ec0440 100644 --- a/drivers/crypto/atmel-aes-regs.h +++ b/drivers/crypto/atmel-aes-regs.h @@ -28,6 +28,7 @@ #define AES_MR_OPMOD_CFB (0x3 << 12) #define AES_MR_OPMOD_CTR (0x4 << 12) #define AES_MR_OPMOD_GCM (0x5 << 12) +#define AES_MR_OPMOD_XTS (0x6 << 12) #define AES_MR_LOD (0x1 << 15) #define AES_MR_CFBS_MASK (0x7 << 16) #define AES_MR_CFBS_128b (0x0 << 16) @@ -67,6 +68,9 @@ #define AES_CTRR 0x98 #define AES_GCMHR(x) (0x9c + ((x) * 0x04)) +#define AES_TWR(x) (0xc0 + ((x) * 0x04)) +#define AES_ALPHAR(x) (0xd0 + ((x) * 0x04)) + #define AES_HW_VERSION 0xFC #endif /* __ATMEL_AES_REGS_H__ */ diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index e3d40a8..0e3d0d6 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -36,6 +36,7 @@ #include <crypto/scatterwalk.h> #include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/xts.h> #include <crypto/internal/aead.h> #include <linux/platform_data/crypto-atmel.h> #include <dt-bindings/dma/at91.h> @@ -68,6 +69,7 @@ #define AES_FLAGS_CFB8 (AES_MR_OPMOD_CFB | AES_MR_CFBS_8b) #define AES_FLAGS_CTR AES_MR_OPMOD_CTR #define AES_FLAGS_GCM AES_MR_OPMOD_GCM +#define AES_FLAGS_XTS AES_MR_OPMOD_XTS #define AES_FLAGS_MODE_MASK (AES_FLAGS_OPMODE_MASK | \ AES_FLAGS_ENCRYPT | \ @@ -89,6 +91,7 @@ struct atmel_aes_caps { bool has_cfb64; bool has_ctr32; bool has_gcm; + bool has_xts; u32 max_burst_size; }; @@ -135,6 +138,12 @@ struct atmel_aes_gcm_ctx { atmel_aes_fn_t ghash_resume; }; +struct atmel_aes_xts_ctx { + struct atmel_aes_base_ctx base; + + u32 key2[AES_KEYSIZE_256 / sizeof(u32)]; +}; + struct atmel_aes_reqctx { unsigned long mode; }; @@ -282,6 +291,20 @@ static const char *atmel_aes_reg_name(u32 offset, char *tmp, size_t sz) snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2); break; + case AES_TWR(0): + case AES_TWR(1): + case AES_TWR(2): + case AES_TWR(3): + snprintf(tmp, sz, "TWR[%u]", (offset - AES_TWR(0)) >> 2); + break; + + case AES_ALPHAR(0): + case AES_ALPHAR(1): + case AES_ALPHAR(2): + case AES_ALPHAR(3): + snprintf(tmp, sz, "ALPHAR[%u]", (offset - AES_ALPHAR(0)) >> 2); + break; + default: snprintf(tmp, sz, "0x%02x", offset); break; @@ -317,7 +340,7 @@ static inline void atmel_aes_write(struct atmel_aes_dev *dd, char tmp[16]; dev_vdbg(dd->dev, "write 0x%08x into %s\n", value, - atmel_aes_reg_name(offset, tmp)); + atmel_aes_reg_name(offset, tmp, sizeof(tmp))); } #endif /* VERBOSE_DEBUG */ @@ -453,15 +476,15 @@ static inline int atmel_aes_complete(struct atmel_aes_dev *dd, int err) return err; } -static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma, - const u32 *iv) +static void atmel_aes_write_ctrl_key(struct atmel_aes_dev *dd, bool use_dma, + const u32 *iv, const u32 *key, int keylen) { u32 valmr = 0; /* MR register must be set before IV registers */ - if (dd->ctx->keylen == AES_KEYSIZE_128) + if (keylen == AES_KEYSIZE_128) valmr |= AES_MR_KEYSIZE_128; - else if (dd->ctx->keylen == AES_KEYSIZE_192) + else if (keylen == AES_KEYSIZE_192) valmr |= AES_MR_KEYSIZE_192; else valmr |= AES_MR_KEYSIZE_256; @@ -478,13 +501,19 @@ static void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma, atmel_aes_write(dd, AES_MR, valmr); - atmel_aes_write_n(dd, AES_KEYWR(0), dd->ctx->key, - SIZE_IN_WORDS(dd->ctx->keylen)); + atmel_aes_write_n(dd, AES_KEYWR(0), key, SIZE_IN_WORDS(keylen)); if (iv && (valmr & AES_MR_OPMOD_MASK) != AES_MR_OPMOD_ECB) atmel_aes_write_block(dd, AES_IVR(0), iv); } +static inline void atmel_aes_write_ctrl(struct atmel_aes_dev *dd, bool use_dma, + const u32 *iv) + +{ + atmel_aes_write_ctrl_key(dd, use_dma, iv, + dd->ctx->key, dd->ctx->keylen); +} /* CPU transfer */ @@ -1769,6 +1798,137 @@ static struct aead_alg aes_gcm_alg = { }; +/* xts functions */ + +static inline struct atmel_aes_xts_ctx * +atmel_aes_xts_ctx_cast(struct atmel_aes_base_ctx *ctx) +{ + return container_of(ctx, struct atmel_aes_xts_ctx, base); +} + +static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd); + +static int atmel_aes_xts_start(struct atmel_aes_dev *dd) +{ + struct atmel_aes_xts_ctx *ctx = atmel_aes_xts_ctx_cast(dd->ctx); + struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); + struct atmel_aes_reqctx *rctx = ablkcipher_request_ctx(req); + unsigned long flags; + int err; + + atmel_aes_set_mode(dd, rctx); + + err = atmel_aes_hw_init(dd); + if (err) + return atmel_aes_complete(dd, err); + + /* Compute the tweak value from req->info with ecb(aes). */ + flags = dd->flags; + dd->flags &= ~AES_FLAGS_MODE_MASK; + dd->flags |= (AES_FLAGS_ECB | AES_FLAGS_ENCRYPT); + atmel_aes_write_ctrl_key(dd, false, NULL, + ctx->key2, ctx->base.keylen); + dd->flags = flags; + + atmel_aes_write_block(dd, AES_IDATAR(0), req->info); + return atmel_aes_wait_for_data_ready(dd, atmel_aes_xts_process_data); +} + +static int atmel_aes_xts_process_data(struct atmel_aes_dev *dd) +{ + struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); + bool use_dma = (req->nbytes >= ATMEL_AES_DMA_THRESHOLD); + u32 tweak[AES_BLOCK_SIZE / sizeof(u32)]; + static const u32 one[AES_BLOCK_SIZE / sizeof(u32)] = {cpu_to_le32(1), }; + u8 *tweak_bytes = (u8 *)tweak; + int i; + + /* Read the computed ciphered tweak value. */ + atmel_aes_read_block(dd, AES_ODATAR(0), tweak); + /* + * Hardware quirk: + * the order of the ciphered tweak bytes need to be reversed before + * writing them into the ODATARx registers. + */ + for (i = 0; i < AES_BLOCK_SIZE/2; ++i) { + u8 tmp = tweak_bytes[AES_BLOCK_SIZE - 1 - i]; + + tweak_bytes[AES_BLOCK_SIZE - 1 - i] = tweak_bytes[i]; + tweak_bytes[i] = tmp; + } + + /* Process the data. */ + atmel_aes_write_ctrl(dd, use_dma, NULL); + atmel_aes_write_block(dd, AES_TWR(0), tweak); + atmel_aes_write_block(dd, AES_ALPHAR(0), one); + if (use_dma) + return atmel_aes_dma_start(dd, req->src, req->dst, req->nbytes, + atmel_aes_transfer_complete); + + return atmel_aes_cpu_start(dd, req->src, req->dst, req->nbytes, + atmel_aes_transfer_complete); +} + +static int atmel_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct atmel_aes_xts_ctx *ctx = crypto_ablkcipher_ctx(tfm); + int err; + + err = xts_check_key(crypto_ablkcipher_tfm(tfm), key, keylen); + if (err) + return err; + + memcpy(ctx->base.key, key, keylen/2); + memcpy(ctx->key2, key + keylen/2, keylen/2); + ctx->base.keylen = keylen/2; + + return 0; +} + +static int atmel_aes_xts_encrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, AES_FLAGS_XTS | AES_FLAGS_ENCRYPT); +} + +static int atmel_aes_xts_decrypt(struct ablkcipher_request *req) +{ + return atmel_aes_crypt(req, AES_FLAGS_XTS); +} + +static int atmel_aes_xts_cra_init(struct crypto_tfm *tfm) +{ + struct atmel_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + tfm->crt_ablkcipher.reqsize = sizeof(struct atmel_aes_reqctx); + ctx->base.start = atmel_aes_xts_start; + + return 0; +} + +static struct crypto_alg aes_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "atmel-xts-aes", + .cra_priority = ATMEL_AES_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct atmel_aes_xts_ctx), + .cra_alignmask = 0xf, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = atmel_aes_xts_cra_init, + .cra_exit = atmel_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = atmel_aes_xts_setkey, + .encrypt = atmel_aes_xts_encrypt, + .decrypt = atmel_aes_xts_decrypt, + } +}; + + /* Probe functions */ static int atmel_aes_buff_init(struct atmel_aes_dev *dd) @@ -1877,6 +2037,9 @@ static void atmel_aes_unregister_algs(struct atmel_aes_dev *dd) { int i; + if (dd->caps.has_xts) + crypto_unregister_alg(&aes_xts_alg); + if (dd->caps.has_gcm) crypto_unregister_aead(&aes_gcm_alg); @@ -1909,8 +2072,16 @@ static int atmel_aes_register_algs(struct atmel_aes_dev *dd) goto err_aes_gcm_alg; } + if (dd->caps.has_xts) { + err = crypto_register_alg(&aes_xts_alg); + if (err) + goto err_aes_xts_alg; + } + return 0; +err_aes_xts_alg: + crypto_unregister_aead(&aes_gcm_alg); err_aes_gcm_alg: crypto_unregister_alg(&aes_cfb64_alg); err_aes_cfb64_alg: @@ -1928,6 +2099,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_cfb64 = 0; dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; + dd->caps.has_xts = 0; dd->caps.max_burst_size = 1; /* keep only major version number */ @@ -1937,6 +2109,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) dd->caps.has_cfb64 = 1; dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; + dd->caps.has_xts = 1; dd->caps.max_burst_size = 4; break; case 0x200: @@ -2138,7 +2311,7 @@ aes_dd_err: static int atmel_aes_remove(struct platform_device *pdev) { - static struct atmel_aes_dev *aes_dd; + struct atmel_aes_dev *aes_dd; aes_dd = platform_get_drvdata(pdev); if (!aes_dd) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 64bf302..bc0d356 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -74,7 +74,7 @@ config CRYPTO_DEV_FSL_CAAM_INTC_TIME_THLD config CRYPTO_DEV_FSL_CAAM_CRYPTO_API tristate "Register algorithm implementations with the Crypto API" - depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + depends on CRYPTO_DEV_FSL_CAAM_JR default y select CRYPTO_AEAD select CRYPTO_AUTHENC @@ -89,7 +89,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API config CRYPTO_DEV_FSL_CAAM_AHASH_API tristate "Register hash algorithm implementations with Crypto API" - depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + depends on CRYPTO_DEV_FSL_CAAM_JR default y select CRYPTO_HASH help @@ -101,7 +101,7 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API config CRYPTO_DEV_FSL_CAAM_PKC_API tristate "Register public key cryptography implementations with Crypto API" - depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + depends on CRYPTO_DEV_FSL_CAAM_JR default y select CRYPTO_RSA help @@ -113,7 +113,7 @@ config CRYPTO_DEV_FSL_CAAM_PKC_API config CRYPTO_DEV_FSL_CAAM_RNG_API tristate "Register caam device for hwrng API" - depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + depends on CRYPTO_DEV_FSL_CAAM_JR default y select CRYPTO_RNG select HW_RANDOM @@ -134,3 +134,6 @@ config CRYPTO_DEV_FSL_CAAM_DEBUG help Selecting this will enable printing of various debug information in the CAAM driver. + +config CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC + def_tristate CRYPTO_DEV_FSL_CAAM_CRYPTO_API diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 08bf551..6554742 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -8,6 +8,7 @@ endif obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC) += caamalg_desc.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 954a64c..662fe94 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -2,6 +2,7 @@ * caam - Freescale FSL CAAM support for crypto API * * Copyright 2008-2011 Freescale Semiconductor, Inc. + * Copyright 2016 NXP * * Based on talitos crypto API driver. * @@ -53,6 +54,7 @@ #include "error.h" #include "sg_sw_sec4.h" #include "key_gen.h" +#include "caamalg_desc.h" /* * crypto alg @@ -62,8 +64,6 @@ #define CAAM_MAX_KEY_SIZE (AES_MAX_KEY_SIZE + \ CTR_RFC3686_NONCE_SIZE + \ SHA512_DIGEST_SIZE * 2) -/* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ -#define CAAM_MAX_IV_LENGTH 16 #define AEAD_DESC_JOB_IO_LEN (DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2) #define GCM_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + \ @@ -71,37 +71,6 @@ #define AUTHENC_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + \ CAAM_CMD_SZ * 5) -/* length of descriptors text */ -#define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) -#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ) -#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) -#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 9 * CAAM_CMD_SZ) - -/* Note: Nonce is counted in enckeylen */ -#define DESC_AEAD_CTR_RFC3686_LEN (4 * CAAM_CMD_SZ) - -#define DESC_AEAD_NULL_BASE (3 * CAAM_CMD_SZ) -#define DESC_AEAD_NULL_ENC_LEN (DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ) -#define DESC_AEAD_NULL_DEC_LEN (DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ) - -#define DESC_GCM_BASE (3 * CAAM_CMD_SZ) -#define DESC_GCM_ENC_LEN (DESC_GCM_BASE + 16 * CAAM_CMD_SZ) -#define DESC_GCM_DEC_LEN (DESC_GCM_BASE + 12 * CAAM_CMD_SZ) - -#define DESC_RFC4106_BASE (3 * CAAM_CMD_SZ) -#define DESC_RFC4106_ENC_LEN (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ) -#define DESC_RFC4106_DEC_LEN (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ) - -#define DESC_RFC4543_BASE (3 * CAAM_CMD_SZ) -#define DESC_RFC4543_ENC_LEN (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ) -#define DESC_RFC4543_DEC_LEN (DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ) - -#define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) -#define DESC_ABLKCIPHER_ENC_LEN (DESC_ABLKCIPHER_BASE + \ - 20 * CAAM_CMD_SZ) -#define DESC_ABLKCIPHER_DEC_LEN (DESC_ABLKCIPHER_BASE + \ - 15 * CAAM_CMD_SZ) - #define DESC_MAX_USED_BYTES (CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) #define DESC_MAX_USED_LEN (DESC_MAX_USED_BYTES / CAAM_CMD_SZ) @@ -117,8 +86,7 @@ static void dbg_dump_sg(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - struct scatterlist *sg, size_t tlen, bool ascii, - bool may_sleep) + struct scatterlist *sg, size_t tlen, bool ascii) { struct scatterlist *it; void *it_page; @@ -152,7 +120,6 @@ static struct list_head alg_list; struct caam_alg_entry { int class1_alg_type; int class2_alg_type; - int alg_op; bool rfc3686; bool geniv; }; @@ -163,52 +130,6 @@ struct caam_aead_alg { bool registered; }; -/* Set DK bit in class 1 operation if shared */ -static inline void append_dec_op1(u32 *desc, u32 type) -{ - u32 *jump_cmd, *uncond_jump_cmd; - - /* DK bit is valid only for AES */ - if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) { - append_operation(desc, type | OP_ALG_AS_INITFINAL | - OP_ALG_DECRYPT); - return; - } - - jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD); - append_operation(desc, type | OP_ALG_AS_INITFINAL | - OP_ALG_DECRYPT); - uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL); - set_jump_tgt_here(desc, jump_cmd); - append_operation(desc, type | OP_ALG_AS_INITFINAL | - OP_ALG_DECRYPT | OP_ALG_AAI_DK); - set_jump_tgt_here(desc, uncond_jump_cmd); -} - -/* - * For aead functions, read payload and write payload, - * both of which are specified in req->src and req->dst - */ -static inline void aead_append_src_dst(u32 *desc, u32 msg_type) -{ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | - KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH); -} - -/* - * For ablkcipher encrypt and decrypt, read from req->src and - * write to req->dst - */ -static inline void ablkcipher_append_src_dst(u32 *desc) -{ - append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | - KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); -} - /* * per-session context */ @@ -220,147 +141,36 @@ struct caam_ctx { dma_addr_t sh_desc_enc_dma; dma_addr_t sh_desc_dec_dma; dma_addr_t sh_desc_givenc_dma; - u32 class1_alg_type; - u32 class2_alg_type; - u32 alg_op; u8 key[CAAM_MAX_KEY_SIZE]; dma_addr_t key_dma; - unsigned int enckeylen; - unsigned int split_key_len; - unsigned int split_key_pad_len; + struct alginfo adata; + struct alginfo cdata; unsigned int authsize; }; -static void append_key_aead(u32 *desc, struct caam_ctx *ctx, - int keys_fit_inline, bool is_rfc3686) -{ - u32 *nonce; - unsigned int enckeylen = ctx->enckeylen; - - /* - * RFC3686 specific: - * | ctx->key = {AUTH_KEY, ENC_KEY, NONCE} - * | enckeylen = encryption key size + nonce size - */ - if (is_rfc3686) - enckeylen -= CTR_RFC3686_NONCE_SIZE; - - if (keys_fit_inline) { - append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, - ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - append_key_as_imm(desc, (void *)ctx->key + - ctx->split_key_pad_len, enckeylen, - enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - } else { - append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - append_key(desc, ctx->key_dma + ctx->split_key_pad_len, - enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - } - - /* Load Counter into CONTEXT1 reg */ - if (is_rfc3686) { - nonce = (u32 *)((void *)ctx->key + ctx->split_key_pad_len + - enckeylen); - append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, - LDST_CLASS_IND_CCB | - LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); - append_move(desc, - MOVE_SRC_OUTFIFO | - MOVE_DEST_CLASS1CTX | - (16 << MOVE_OFFSET_SHIFT) | - (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); - } -} - -static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, - int keys_fit_inline, bool is_rfc3686) -{ - u32 *key_jump_cmd; - - /* Note: Context registers are saved. */ - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - append_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); - - set_jump_tgt_here(desc, key_jump_cmd); -} - static int aead_null_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - bool keys_fit_inline = false; - u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd; u32 *desc; + int rem_bytes = CAAM_DESC_BYTES_MAX - AEAD_DESC_JOB_IO_LEN - + ctx->adata.keylen_pad; /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - if (DESC_AEAD_NULL_ENC_LEN + AEAD_DESC_JOB_IO_LEN + - ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_AEAD_NULL_ENC_LEN) { + ctx->adata.key_inline = true; + ctx->adata.key_virt = ctx->key; + } else { + ctx->adata.key_inline = false; + ctx->adata.key_dma = ctx->key_dma; + } /* aead_encrypt shared descriptor */ desc = ctx->sh_desc_enc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, - ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - else - append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - set_jump_tgt_here(desc, key_jump_cmd); - - /* assoclen + cryptlen = seqinlen */ - append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Prepare to read and write cryptlen + assoclen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* - * MOVE_LEN opcode is not available in all SEC HW revisions, - * thus need to do some magic, i.e. self-patch the descriptor - * buffer. - */ - read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | - MOVE_DEST_MATH3 | - (0x6 << MOVE_LEN_SHIFT)); - write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | - MOVE_DEST_DESCBUF | - MOVE_WAITCOMP | - (0x8 << MOVE_LEN_SHIFT)); - - /* Class 2 operation */ - append_operation(desc, ctx->class2_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Read and write cryptlen bytes */ - aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); - - set_move_tgt_here(desc, read_move_cmd); - set_move_tgt_here(desc, write_move_cmd); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | - MOVE_AUX_LS); - - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | - LDST_SRCDST_BYTE_CONTEXT); - + cnstr_shdsc_aead_null_encap(desc, &ctx->adata, ctx->authsize); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -368,84 +178,22 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "aead null enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN + - ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - desc = ctx->sh_desc_dec; + if (rem_bytes >= DESC_AEAD_NULL_DEC_LEN) { + ctx->adata.key_inline = true; + ctx->adata.key_virt = ctx->key; + } else { + ctx->adata.key_inline = false; + ctx->adata.key_dma = ctx->key_dma; + } /* aead_decrypt shared descriptor */ - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, - ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - else - append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Class 2 operation */ - append_operation(desc, ctx->class2_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - - /* assoclen + cryptlen = seqoutlen */ - append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* Prepare to read and write cryptlen + assoclen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); - - /* - * MOVE_LEN opcode is not available in all SEC HW revisions, - * thus need to do some magic, i.e. self-patch the descriptor - * buffer. - */ - read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | - MOVE_DEST_MATH2 | - (0x6 << MOVE_LEN_SHIFT)); - write_move_cmd = append_move(desc, MOVE_SRC_MATH2 | - MOVE_DEST_DESCBUF | - MOVE_WAITCOMP | - (0x8 << MOVE_LEN_SHIFT)); - - /* Read and write cryptlen bytes */ - aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); - - /* - * Insert a NOP here, since we need at least 4 instructions between - * code patching the descriptor buffer and the location being patched. - */ - jump_cmd = append_jump(desc, JUMP_TEST_ALL); - set_jump_tgt_here(desc, jump_cmd); - - set_move_tgt_here(desc, read_move_cmd); - set_move_tgt_here(desc, write_move_cmd); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | - MOVE_AUX_LS); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - - /* Load ICV */ - append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | - FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); - + desc = ctx->sh_desc_dec; + cnstr_shdsc_aead_null_decap(desc, &ctx->adata, ctx->authsize); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -453,12 +201,6 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "aead null dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif return 0; } @@ -470,11 +212,11 @@ static int aead_set_sh_desc(struct crypto_aead *aead) unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - bool keys_fit_inline; - u32 geniv, moveiv; u32 ctx1_iv_off = 0; - u32 *desc; - const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == + u32 *desc, *nonce = NULL; + u32 inl_mask; + unsigned int data_len[2]; + const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; @@ -482,7 +224,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) return 0; /* NULL encryption / decryption */ - if (!ctx->enckeylen) + if (!ctx->cdata.keylen) return aead_null_set_sh_desc(aead); /* @@ -497,8 +239,14 @@ static int aead_set_sh_desc(struct crypto_aead *aead) * RFC3686 specific: * CONTEXT1[255:128] = {NONCE, IV, COUNTER} */ - if (is_rfc3686) + if (is_rfc3686) { ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE; + nonce = (u32 *)((void *)ctx->key + ctx->adata.keylen_pad + + ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE); + } + + data_len[0] = ctx->adata.keylen_pad; + data_len[1] = ctx->cdata.keylen; if (alg->caam.geniv) goto skip_enc; @@ -507,54 +255,29 @@ static int aead_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_AEAD_ENC_LEN + AUTHENC_DESC_JOB_IO_LEN + - ctx->split_key_pad_len + ctx->enckeylen + - (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= - CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - /* aead_encrypt shared descriptor */ - desc = ctx->sh_desc_enc; - - /* Note: Context registers are saved. */ - init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); - - /* Class 2 operation */ - append_operation(desc, ctx->class2_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Read and write assoclen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* Skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* read assoc before reading payload */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | - FIFOLDST_VLF); - - /* Load Counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); + if (desc_inline_query(DESC_AEAD_ENC_LEN + + (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0), + AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask, + ARRAY_SIZE(data_len)) < 0) + return -EINVAL; - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); + if (inl_mask & 1) + ctx->adata.key_virt = ctx->key; + else + ctx->adata.key_dma = ctx->key_dma; - /* Read and write cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + if (inl_mask & 2) + ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad; + else + ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad; - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | - LDST_SRCDST_BYTE_CONTEXT); + ctx->adata.key_inline = !!(inl_mask & 1); + ctx->cdata.key_inline = !!(inl_mask & 2); + /* aead_encrypt shared descriptor */ + desc = ctx->sh_desc_enc; + cnstr_shdsc_aead_encap(desc, &ctx->cdata, &ctx->adata, ctx->authsize, + is_rfc3686, nonce, ctx1_iv_off); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -562,79 +285,36 @@ static int aead_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "aead enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif skip_enc: /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_AEAD_DEC_LEN + AUTHENC_DESC_JOB_IO_LEN + - ctx->split_key_pad_len + ctx->enckeylen + - (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= - CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - /* aead_decrypt shared descriptor */ - desc = ctx->sh_desc_dec; - - /* Note: Context registers are saved. */ - init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); - - /* Class 2 operation */ - append_operation(desc, ctx->class2_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); + if (desc_inline_query(DESC_AEAD_DEC_LEN + + (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0), + AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask, + ARRAY_SIZE(data_len)) < 0) + return -EINVAL; - /* Read and write assoclen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - if (alg->caam.geniv) - append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize); + if (inl_mask & 1) + ctx->adata.key_virt = ctx->key; else - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* Skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* read assoc before reading payload */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | - KEY_VLF); + ctx->adata.key_dma = ctx->key_dma; - if (alg->caam.geniv) { - append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - (ctx1_iv_off << LDST_OFFSET_SHIFT)); - append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | - (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize); - } - - /* Load Counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); - - /* Choose operation */ - if (ctr_mode) - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT); + if (inl_mask & 2) + ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad; else - append_dec_op1(desc, ctx->class1_alg_type); - - /* Read and write cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG); + ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad; - /* Load ICV */ - append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | - FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); + ctx->adata.key_inline = !!(inl_mask & 1); + ctx->cdata.key_inline = !!(inl_mask & 2); + /* aead_decrypt shared descriptor */ + desc = ctx->sh_desc_dec; + cnstr_shdsc_aead_decap(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, alg->caam.geniv, is_rfc3686, + nonce, ctx1_iv_off); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -642,11 +322,6 @@ skip_enc: dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "aead dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif if (!alg->caam.geniv) goto skip_givenc; @@ -655,93 +330,30 @@ skip_enc: * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_AEAD_GIVENC_LEN + AUTHENC_DESC_JOB_IO_LEN + - ctx->split_key_pad_len + ctx->enckeylen + - (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0) <= - CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - /* aead_givencrypt shared descriptor */ - desc = ctx->sh_desc_enc; - - /* Note: Context registers are saved. */ - init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); - - if (is_rfc3686) - goto copy_iv; - - /* Generate IV */ - geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | - NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); - append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | - LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - append_move(desc, MOVE_WAITCOMP | - MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX | - (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (ivsize << MOVE_LEN_SHIFT)); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - -copy_iv: - /* Copy IV to class 1 context */ - append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO | - (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (ivsize << MOVE_LEN_SHIFT)); - - /* Return to encryption */ - append_operation(desc, ctx->class2_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Read and write assoclen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - - /* Skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* read assoc before reading payload */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | - KEY_VLF); - - /* Copy iv from outfifo to class 2 fifo */ - moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 | - NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT); - append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB | - LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB | - LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM); - - /* Load Counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Will write ivsize + cryptlen */ - append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + if (desc_inline_query(DESC_AEAD_GIVENC_LEN + + (is_rfc3686 ? DESC_AEAD_CTR_RFC3686_LEN : 0), + AUTHENC_DESC_JOB_IO_LEN, data_len, &inl_mask, + ARRAY_SIZE(data_len)) < 0) + return -EINVAL; - /* Not need to reload iv */ - append_seq_fifo_load(desc, ivsize, - FIFOLD_CLASS_SKIP); + if (inl_mask & 1) + ctx->adata.key_virt = ctx->key; + else + ctx->adata.key_dma = ctx->key_dma; - /* Will read cryptlen */ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + if (inl_mask & 2) + ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad; + else + ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad; - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | - LDST_SRCDST_BYTE_CONTEXT); + ctx->adata.key_inline = !!(inl_mask & 1); + ctx->cdata.key_inline = !!(inl_mask & 2); + /* aead_givencrypt shared descriptor */ + desc = ctx->sh_desc_enc; + cnstr_shdsc_aead_givencap(desc, &ctx->cdata, &ctx->adata, ivsize, + ctx->authsize, is_rfc3686, nonce, + ctx1_iv_off); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -749,11 +361,6 @@ copy_iv: dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "aead givenc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif skip_givenc: return 0; @@ -774,12 +381,11 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - bool keys_fit_inline = false; - u32 *key_jump_cmd, *zero_payload_jump_cmd, - *zero_assoc_jump_cmd1, *zero_assoc_jump_cmd2; u32 *desc; + int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN - + ctx->cdata.keylen; - if (!ctx->enckeylen || !ctx->authsize) + if (!ctx->cdata.keylen || !ctx->authsize) return 0; /* @@ -787,82 +393,16 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_GCM_ENC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_enc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* skip key loading if they are loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD | JUMP_COND_SELF); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* if assoclen + cryptlen is ZERO, skip to ICV write */ - append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - - /* if assoclen is ZERO, skip reading the assoc data */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* cryptlen = seqinlen - assoclen */ - append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ); - - /* if cryptlen is ZERO jump to zero-payload commands */ - zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - - /* read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - set_jump_tgt_here(desc, zero_assoc_jump_cmd1); - - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* write encrypted data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* read payload data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); - - /* jump the zero-payload commands */ - append_jump(desc, JUMP_TEST_ALL | 2); - - /* zero-payload commands */ - set_jump_tgt_here(desc, zero_payload_jump_cmd); - - /* read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1); - - /* There is no input data */ - set_jump_tgt_here(desc, zero_assoc_jump_cmd2); - - /* write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT); - + cnstr_shdsc_gcm_encap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -870,80 +410,21 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "gcm enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_GCM_DEC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_dec; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* skip key loading if they are loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | - JUMP_TEST_ALL | JUMP_COND_SHRD | - JUMP_COND_SELF); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - - /* if assoclen is ZERO, skip reading the assoc data */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - - set_jump_tgt_here(desc, zero_assoc_jump_cmd1); - - /* cryptlen = seqoutlen - assoclen */ - append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* jump to zero-payload command if cryptlen is zero */ - zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - - append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* store encrypted data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* read payload data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); - - /* zero-payload command */ - set_jump_tgt_here(desc, zero_payload_jump_cmd); - - /* read ICV */ - append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); - + cnstr_shdsc_gcm_decap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -951,11 +432,6 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "gcm dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif return 0; } @@ -974,11 +450,11 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - bool keys_fit_inline = false; - u32 *key_jump_cmd; u32 *desc; + int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN - + ctx->cdata.keylen; - if (!ctx->enckeylen || !ctx->authsize) + if (!ctx->cdata.keylen || !ctx->authsize) return 0; /* @@ -986,62 +462,16 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_RFC4106_ENC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_enc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - - /* Skip IV */ - append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); - - /* Will read cryptlen bytes */ - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG); - - /* Skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* cryptlen = seqoutlen - assoclen */ - append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ); - - /* Write encrypted data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* Read payload data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); - - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT); - + cnstr_shdsc_rfc4106_encap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1049,73 +479,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "rfc4106 enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_RFC4106_DEC_LEN + DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_RFC4106_DEC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_dec; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | - JUMP_TEST_ALL | JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - - append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8); - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - - /* Skip IV */ - append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); - - /* Will read cryptlen bytes */ - append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ); - - /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG); - - /* Skip assoc data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - - /* Will write cryptlen bytes */ - append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* Store payload data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* Read encrypted data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); - - /* Read ICV */ - append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); - + cnstr_shdsc_rfc4106_decap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1123,11 +501,6 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "rfc4106 dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif return 0; } @@ -1147,12 +520,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) { struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; - bool keys_fit_inline = false; - u32 *key_jump_cmd; - u32 *read_move_cmd, *write_move_cmd; u32 *desc; + int rem_bytes = CAAM_DESC_BYTES_MAX - GCM_DESC_JOB_IO_LEN - + ctx->cdata.keylen; - if (!ctx->enckeylen || !ctx->authsize) + if (!ctx->cdata.keylen || !ctx->authsize) return 0; /* @@ -1160,61 +532,16 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_RFC4543_ENC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_enc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* assoclen + cryptlen = seqinlen */ - append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* - * MOVE_LEN opcode is not available in all SEC HW revisions, - * thus need to do some magic, i.e. self-patch the descriptor - * buffer. - */ - read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | - (0x6 << MOVE_LEN_SHIFT)); - write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | - (0x8 << MOVE_LEN_SHIFT)); - - /* Will read assoclen + cryptlen bytes */ - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Will write assoclen + cryptlen bytes */ - append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Read and write assoclen + cryptlen bytes */ - aead_append_src_dst(desc, FIFOLD_TYPE_AAD); - - set_move_tgt_here(desc, read_move_cmd); - set_move_tgt_here(desc, write_move_cmd); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - /* Move payload data to OFIFO */ - append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); - - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT); - + cnstr_shdsc_rfc4543_encap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1222,77 +549,21 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "rfc4543 enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer */ - keys_fit_inline = false; - if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; + if (rem_bytes >= DESC_RFC4543_DEC_LEN) { + ctx->cdata.key_inline = true; + ctx->cdata.key_virt = ctx->key; + } else { + ctx->cdata.key_inline = false; + ctx->cdata.key_dma = ctx->key_dma; + } desc = ctx->sh_desc_dec; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | - JUMP_TEST_ALL | JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - - /* assoclen + cryptlen = seqoutlen */ - append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* - * MOVE_LEN opcode is not available in all SEC HW revisions, - * thus need to do some magic, i.e. self-patch the descriptor - * buffer. - */ - read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | - (0x6 << MOVE_LEN_SHIFT)); - write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | - (0x8 << MOVE_LEN_SHIFT)); - - /* Will read assoclen + cryptlen bytes */ - append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* Will write assoclen + cryptlen bytes */ - append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - - /* Store payload data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* In-snoop assoclen + cryptlen data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1); - - set_move_tgt_here(desc, read_move_cmd); - set_move_tgt_here(desc, write_move_cmd); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - /* Move payload data to OFIFO */ - append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - - /* Read ICV */ - append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); - + cnstr_shdsc_rfc4543_decap(desc, &ctx->cdata, ctx->authsize); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1300,11 +571,6 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "rfc4543 dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif return 0; } @@ -1320,19 +586,9 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, return 0; } -static u32 gen_split_aead_key(struct caam_ctx *ctx, const u8 *key_in, - u32 authkeylen) -{ - return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, - ctx->split_key_pad_len, key_in, authkeylen, - ctx->alg_op); -} - static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ - static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; struct crypto_authenc_keys keys; @@ -1341,33 +597,25 @@ static int aead_setkey(struct crypto_aead *aead, if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) goto badkey; - /* Pick class 2 key length from algorithm submask */ - ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> - OP_ALG_ALGSEL_SHIFT] * 2; - ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); - - if (ctx->split_key_pad_len + keys.enckeylen > CAAM_MAX_KEY_SIZE) - goto badkey; - #ifdef DEBUG printk(KERN_ERR "keylen %d enckeylen %d authkeylen %d\n", keys.authkeylen + keys.enckeylen, keys.enckeylen, keys.authkeylen); - printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", - ctx->split_key_len, ctx->split_key_pad_len); print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); #endif - ret = gen_split_aead_key(ctx, keys.authkey, keys.authkeylen); + ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, keys.authkey, + keys.authkeylen, CAAM_MAX_KEY_SIZE - + keys.enckeylen); if (ret) { goto badkey; } /* postpend encryption key to auth split key */ - memcpy(ctx->key + ctx->split_key_pad_len, keys.enckey, keys.enckeylen); + memcpy(ctx->key + ctx->adata.keylen_pad, keys.enckey, keys.enckeylen); - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len + + ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad + keys.enckeylen, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->key_dma)) { dev_err(jrdev, "unable to map key i/o memory\n"); @@ -1376,14 +624,14 @@ static int aead_setkey(struct crypto_aead *aead, #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, - ctx->split_key_pad_len + keys.enckeylen, 1); + ctx->adata.keylen_pad + keys.enckeylen, 1); #endif - ctx->enckeylen = keys.enckeylen; + ctx->cdata.keylen = keys.enckeylen; ret = aead_set_sh_desc(aead); if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len + + dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad + keys.enckeylen, DMA_TO_DEVICE); } @@ -1412,11 +660,11 @@ static int gcm_setkey(struct crypto_aead *aead, dev_err(jrdev, "unable to map key i/o memory\n"); return -ENOMEM; } - ctx->enckeylen = keylen; + ctx->cdata.keylen = keylen; ret = gcm_set_sh_desc(aead); if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, + dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, DMA_TO_DEVICE); } @@ -1444,9 +692,9 @@ static int rfc4106_setkey(struct crypto_aead *aead, * The last four bytes of the key material are used as the salt value * in the nonce. Update the AES key length. */ - ctx->enckeylen = keylen - 4; + ctx->cdata.keylen = keylen - 4; - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen, + ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->key_dma)) { dev_err(jrdev, "unable to map key i/o memory\n"); @@ -1455,7 +703,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, ret = rfc4106_set_sh_desc(aead); if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, + dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, DMA_TO_DEVICE); } @@ -1483,9 +731,9 @@ static int rfc4543_setkey(struct crypto_aead *aead, * The last four bytes of the key material are used as the salt value * in the nonce. Update the AES key length. */ - ctx->enckeylen = keylen - 4; + ctx->cdata.keylen = keylen - 4; - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->enckeylen, + ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->cdata.keylen, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->key_dma)) { dev_err(jrdev, "unable to map key i/o memory\n"); @@ -1494,7 +742,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, ret = rfc4543_set_sh_desc(aead); if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->enckeylen, + dma_unmap_single(jrdev, ctx->key_dma, ctx->cdata.keylen, DMA_TO_DEVICE); } @@ -1505,21 +753,18 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, const u8 *key, unsigned int keylen) { struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); - struct ablkcipher_tfm *crt = &ablkcipher->base.crt_ablkcipher; struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablkcipher); const char *alg_name = crypto_tfm_alg_name(tfm); struct device *jrdev = ctx->jrdev; - int ret = 0; - u32 *key_jump_cmd; + unsigned int ivsize = crypto_ablkcipher_ivsize(ablkcipher); u32 *desc; - u8 *nonce; - u32 geniv; u32 ctx1_iv_off = 0; - const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == + const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = (ctr_mode && (strstr(alg_name, "rfc3686") != NULL)); + memcpy(ctx->key, key, keylen); #ifdef DEBUG print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); @@ -1542,60 +787,20 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, keylen -= CTR_RFC3686_NONCE_SIZE; } - memcpy(ctx->key, key, keylen); ctx->key_dma = dma_map_single(jrdev, ctx->key, keylen, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->key_dma)) { dev_err(jrdev, "unable to map key i/o memory\n"); return -ENOMEM; } - ctx->enckeylen = keylen; + ctx->cdata.keylen = keylen; + ctx->cdata.key_virt = ctx->key; + ctx->cdata.key_inline = true; /* ablkcipher_encrypt shared descriptor */ desc = ctx->sh_desc_enc; - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - /* Load class1 key only */ - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | - KEY_DEST_CLASS_REG); - - /* Load nonce into CONTEXT1 reg */ - if (is_rfc3686) { - nonce = (u8 *)key + keylen; - append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, - LDST_CLASS_IND_CCB | - LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); - append_move(desc, MOVE_WAITCOMP | - MOVE_SRC_OUTFIFO | - MOVE_DEST_CLASS1CTX | - (16 << MOVE_OFFSET_SHIFT) | - (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); - } - - set_jump_tgt_here(desc, key_jump_cmd); - - /* Load iv */ - append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); - - /* Load counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); - - /* Load operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Perform operation */ - ablkcipher_append_src_dst(desc); - + cnstr_shdsc_ablkcipher_encap(desc, &ctx->cdata, ivsize, is_rfc3686, + ctx1_iv_off); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1603,61 +808,11 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "ablkcipher enc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif + /* ablkcipher_decrypt shared descriptor */ desc = ctx->sh_desc_dec; - - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - /* Load class1 key only */ - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | - KEY_DEST_CLASS_REG); - - /* Load nonce into CONTEXT1 reg */ - if (is_rfc3686) { - nonce = (u8 *)key + keylen; - append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, - LDST_CLASS_IND_CCB | - LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); - append_move(desc, MOVE_WAITCOMP | - MOVE_SRC_OUTFIFO | - MOVE_DEST_CLASS1CTX | - (16 << MOVE_OFFSET_SHIFT) | - (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); - } - - set_jump_tgt_here(desc, key_jump_cmd); - - /* load IV */ - append_seq_load(desc, crt->ivsize, LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); - - /* Load counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); - - /* Choose operation */ - if (ctr_mode) - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT); - else - append_dec_op1(desc, ctx->class1_alg_type); - - /* Perform operation */ - ablkcipher_append_src_dst(desc); - + cnstr_shdsc_ablkcipher_decap(desc, &ctx->cdata, ivsize, is_rfc3686, + ctx1_iv_off); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1666,76 +821,10 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "ablkcipher dec shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif /* ablkcipher_givencrypt shared descriptor */ desc = ctx->sh_desc_givenc; - - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - /* Load class1 key only */ - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | - KEY_DEST_CLASS_REG); - - /* Load Nonce into CONTEXT1 reg */ - if (is_rfc3686) { - nonce = (u8 *)key + keylen; - append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, - LDST_CLASS_IND_CCB | - LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); - append_move(desc, MOVE_WAITCOMP | - MOVE_SRC_OUTFIFO | - MOVE_DEST_CLASS1CTX | - (16 << MOVE_OFFSET_SHIFT) | - (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); - } - set_jump_tgt_here(desc, key_jump_cmd); - - /* Generate IV */ - geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | - NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (crt->ivsize << NFIFOENTRY_DLEN_SHIFT); - append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | - LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - append_move(desc, MOVE_WAITCOMP | - MOVE_SRC_INFIFO | - MOVE_DEST_CLASS1CTX | - (crt->ivsize << MOVE_LEN_SHIFT) | - (ctx1_iv_off << MOVE_OFFSET_SHIFT)); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - - /* Copy generated IV to memory */ - append_seq_store(desc, crt->ivsize, - LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | - (ctx1_iv_off << LDST_OFFSET_SHIFT)); - - /* Load Counter into CONTEXT1 reg */ - if (is_rfc3686) - append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT | - ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << - LDST_OFFSET_SHIFT)); - - if (ctx1_iv_off) - append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP | - (1 << JUMP_OFFSET_SHIFT)); - - /* Load operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* Perform operation */ - ablkcipher_append_src_dst(desc); - + cnstr_shdsc_ablkcipher_givencap(desc, &ctx->cdata, ivsize, is_rfc3686, + ctx1_iv_off); ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -1743,14 +832,8 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif - return ret; + return 0; } static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, @@ -1758,8 +841,7 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, { struct caam_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher); struct device *jrdev = ctx->jrdev; - u32 *key_jump_cmd, *desc; - __be64 sector_size = cpu_to_be64(512); + u32 *desc; if (keylen != 2 * AES_MIN_KEY_SIZE && keylen != 2 * AES_MAX_KEY_SIZE) { crypto_ablkcipher_set_flags(ablkcipher, @@ -1774,88 +856,23 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, dev_err(jrdev, "unable to map key i/o memory\n"); return -ENOMEM; } - ctx->enckeylen = keylen; + ctx->cdata.keylen = keylen; + ctx->cdata.key_virt = ctx->key; + ctx->cdata.key_inline = true; /* xts_ablkcipher_encrypt shared descriptor */ desc = ctx->sh_desc_enc; - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - /* Load class1 keys only */ - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - - /* Load sector size with index 40 bytes (0x28) */ - append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8); - append_data(desc, (void *)§or_size, 8); - - set_jump_tgt_here(desc, key_jump_cmd); - - /* - * create sequence for loading the sector index - * Upper 8B of IV - will be used as sector index - * Lower 8B of IV - will be discarded - */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8); - append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); - - /* Load operation */ - append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | - OP_ALG_ENCRYPT); - - /* Perform operation */ - ablkcipher_append_src_dst(desc); - + cnstr_shdsc_xts_ablkcipher_encap(desc, &ctx->cdata); ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); -#endif /* xts_ablkcipher_decrypt shared descriptor */ desc = ctx->sh_desc_dec; - - init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - /* Load class1 key only */ - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - - /* Load sector size with index 40 bytes (0x28) */ - append_cmd(desc, CMD_LOAD | IMMEDIATE | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (0x28 << LDST_OFFSET_SHIFT) | 8); - append_data(desc, (void *)§or_size, 8); - - set_jump_tgt_here(desc, key_jump_cmd); - - /* - * create sequence for loading the sector index - * Upper 8B of IV - will be used as sector index - * Lower 8B of IV - will be discarded - */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_1_CCB | (0x20 << LDST_OFFSET_SHIFT) | 8); - append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); - - /* Load operation */ - append_dec_op1(desc, ctx->class1_alg_type); - - /* Perform operation */ - ablkcipher_append_src_dst(desc); - + cnstr_shdsc_xts_ablkcipher_decap(desc, &ctx->cdata); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { @@ -1864,31 +881,22 @@ static int xts_ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); -#endif return 0; } /* * aead_edesc - s/w-extended aead descriptor - * @assoc_nents: number of segments in associated data (SPI+Seq) scatterlist * @src_nents: number of segments in input scatterlist * @dst_nents: number of segments in output scatterlist - * @iv_dma: dma address of iv for checking continuity and link table - * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) * @sec4_sg_bytes: length of dma mapped sec4_sg space * @sec4_sg_dma: bus physical mapped address of h/w link table + * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables */ struct aead_edesc { - int assoc_nents; int src_nents; int dst_nents; - dma_addr_t iv_dma; int sec4_sg_bytes; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; @@ -1900,9 +908,9 @@ struct aead_edesc { * @src_nents: number of segments in input scatterlist * @dst_nents: number of segments in output scatterlist * @iv_dma: dma address of iv for checking continuity and link table - * @desc: h/w descriptor (variable length; must not exceed MAX_CAAM_DESCSIZE) * @sec4_sg_bytes: length of dma mapped sec4_sg space * @sec4_sg_dma: bus physical mapped address of h/w link table + * @sec4_sg: pointer to h/w link table * @hw_desc: the h/w job descriptor followed by any referenced link tables */ struct ablkcipher_edesc { @@ -2019,8 +1027,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ablkcipher_edesc *)((char *)desc - - offsetof(struct ablkcipher_edesc, hw_desc)); + edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -2031,7 +1038,7 @@ static void ablkcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err, edesc->src_nents > 1 ? 100 : ivsize, 1); dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true); + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); #endif ablkcipher_unmap(jrdev, edesc, req); @@ -2052,8 +1059,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ablkcipher_edesc *)((char *)desc - - offsetof(struct ablkcipher_edesc, hw_desc)); + edesc = container_of(desc, struct ablkcipher_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -2063,7 +1069,7 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, ivsize, 1); dbg_dump_sg(KERN_ERR, "dst @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->dst, - edesc->dst_nents > 1 ? 100 : req->nbytes, 1, true); + edesc->dst_nents > 1 ? 100 : req->nbytes, 1); #endif ablkcipher_unmap(jrdev, edesc, req); @@ -2157,7 +1163,7 @@ static void init_gcm_job(struct aead_request *req, FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | 12 | last); /* Append Salt */ if (!generic_gcm) - append_data(desc, ctx->key + ctx->enckeylen, 4); + append_data(desc, ctx->key + ctx->cdata.keylen, 4); /* Append IV */ append_data(desc, req->iv, ivsize); /* End of blank commands */ @@ -2172,7 +1178,7 @@ static void init_authenc_job(struct aead_request *req, struct caam_aead_alg, aead); unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); - const bool ctr_mode = ((ctx->class1_alg_type & OP_ALG_AAI_MASK) == + const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; u32 *desc = edesc->hw_desc; @@ -2218,15 +1224,13 @@ static void init_ablkcipher_job(u32 *sh_desc, dma_addr_t ptr, int len, sec4_sg_index = 0; #ifdef DEBUG - bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP)) != 0); print_hex_dump(KERN_ERR, "presciv@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, ivsize, 1); printk(KERN_ERR "asked=%d, nbytes%d\n", (int)edesc->src_nents ? 100 : req->nbytes, req->nbytes); dbg_dump_sg(KERN_ERR, "src @"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents ? 100 : req->nbytes, 1, may_sleep); + edesc->src_nents ? 100 : req->nbytes, 1); #endif len = desc_len(sh_desc); @@ -2278,14 +1282,12 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr, int len, sec4_sg_index = 0; #ifdef DEBUG - bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP)) != 0); print_hex_dump(KERN_ERR, "presciv@" __stringify(__LINE__) ": ", DUMP_PREFIX_ADDRESS, 16, 4, req->info, ivsize, 1); dbg_dump_sg(KERN_ERR, "src @" __stringify(__LINE__) ": ", DUMP_PREFIX_ADDRESS, 16, 4, req->src, - edesc->src_nents ? 100 : req->nbytes, 1, may_sleep); + edesc->src_nents ? 100 : req->nbytes, 1); #endif len = desc_len(sh_desc); @@ -2344,10 +1346,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, /* Check if data are contiguous. */ all_contig = !src_nents; - if (!all_contig) { - src_nents = src_nents ? : 1; + if (!all_contig) sec4_sg_len = src_nents; - } sec4_sg_len += dst_nents; @@ -2556,11 +1556,9 @@ static int aead_decrypt(struct aead_request *req) int ret = 0; #ifdef DEBUG - bool may_sleep = ((req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | - CRYPTO_TFM_REQ_MAY_SLEEP)) != 0); dbg_dump_sg(KERN_ERR, "dec src@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, req->src, - req->assoclen + req->cryptlen, 1, may_sleep); + req->assoclen + req->cryptlen, 1); #endif /* allocate extended descriptor */ @@ -2618,16 +1616,33 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request if (likely(req->src == req->dst)) { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_BIDIRECTIONAL); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } } else { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } + sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map destination\n"); + dma_unmap_sg(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE); + return ERR_PTR(-ENOMEM); + } } iv_dma = dma_map_single(jrdev, req->info, ivsize, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, iv_dma)) { dev_err(jrdev, "unable to map IV\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, + 0, 0, 0); return ERR_PTR(-ENOMEM); } @@ -2647,6 +1662,8 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request GFP_DMA | flags); if (!edesc) { dev_err(jrdev, "could not allocate extended descriptor\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, 0, 0); return ERR_PTR(-ENOMEM); } @@ -2673,6 +1690,9 @@ static struct ablkcipher_edesc *ablkcipher_edesc_alloc(struct ablkcipher_request sec4_sg_bytes, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { dev_err(jrdev, "unable to map S/G table\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, 0, 0); + kfree(edesc); return ERR_PTR(-ENOMEM); } @@ -2794,11 +1814,26 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( if (likely(req->src == req->dst)) { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_BIDIRECTIONAL); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } } else { sgc = dma_map_sg(jrdev, req->src, src_nents ? : 1, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + return ERR_PTR(-ENOMEM); + } + sgc = dma_map_sg(jrdev, req->dst, dst_nents ? : 1, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map destination\n"); + dma_unmap_sg(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE); + return ERR_PTR(-ENOMEM); + } } /* @@ -2808,6 +1843,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( iv_dma = dma_map_single(jrdev, greq->giv, ivsize, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, iv_dma)) { dev_err(jrdev, "unable to map IV\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, 0, + 0, 0, 0); return ERR_PTR(-ENOMEM); } @@ -2823,6 +1860,8 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( GFP_DMA | flags); if (!edesc) { dev_err(jrdev, "could not allocate extended descriptor\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, 0, 0); return ERR_PTR(-ENOMEM); } @@ -2850,6 +1889,9 @@ static struct ablkcipher_edesc *ablkcipher_giv_edesc_alloc( sec4_sg_bytes, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { dev_err(jrdev, "unable to map S/G table\n"); + caam_unmap(jrdev, req->src, req->dst, src_nents, dst_nents, + iv_dma, ivsize, 0, 0); + kfree(edesc); return ERR_PTR(-ENOMEM); } edesc->iv_dma = iv_dma; @@ -2916,7 +1958,6 @@ struct caam_alg_template { } template_u; u32 class1_alg_type; u32 class2_alg_type; - u32 alg_op; }; static struct caam_alg_template driver_algs[] = { @@ -3101,7 +2142,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, }, }, { @@ -3123,7 +2163,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, }, { @@ -3145,7 +2184,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, }, { @@ -3167,7 +2205,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, }, { @@ -3189,7 +2226,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, }, { @@ -3211,7 +2247,6 @@ static struct caam_aead_alg driver_aeads[] = { .caam = { .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, }, { @@ -3233,7 +2268,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, }, }, { @@ -3256,7 +2290,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3279,7 +2312,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, }, { @@ -3302,7 +2334,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3325,7 +2356,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, }, { @@ -3348,7 +2378,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3371,7 +2400,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, }, { @@ -3394,7 +2422,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3417,7 +2444,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, }, { @@ -3440,7 +2466,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3463,7 +2488,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, }, { @@ -3486,7 +2510,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3509,7 +2532,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, } }, { @@ -3532,7 +2554,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, .geniv = true, } }, @@ -3556,7 +2577,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, }, { @@ -3580,7 +2600,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3604,7 +2623,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, }, { @@ -3628,7 +2646,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3652,7 +2669,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, }, { @@ -3676,7 +2692,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3700,7 +2715,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, }, { @@ -3724,7 +2738,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3748,7 +2761,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, }, { @@ -3772,7 +2784,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_3DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3795,7 +2806,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, }, }, { @@ -3818,7 +2828,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3841,7 +2850,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, }, { @@ -3864,7 +2872,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3887,7 +2894,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, }, { @@ -3910,7 +2916,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3933,7 +2938,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, }, { @@ -3956,7 +2960,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -3979,7 +2982,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, }, { @@ -4002,7 +3004,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -4025,7 +3026,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, }, { @@ -4048,7 +3048,6 @@ static struct caam_aead_alg driver_aeads[] = { .class1_alg_type = OP_ALG_ALGSEL_DES | OP_ALG_AAI_CBC, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, .geniv = true, }, }, @@ -4073,7 +3072,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4098,7 +3096,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4124,7 +3121,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4149,7 +3145,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4175,7 +3170,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4200,7 +3194,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4226,7 +3219,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4251,7 +3243,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4277,7 +3268,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4302,7 +3292,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4328,7 +3317,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, .rfc3686 = true, }, }, @@ -4353,7 +3341,6 @@ static struct caam_aead_alg driver_aeads[] = { OP_ALG_AAI_CTR_MOD128, .class2_alg_type = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC_PRECOMP, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, .rfc3686 = true, .geniv = true, }, @@ -4375,9 +3362,8 @@ static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam) } /* copy descriptor header template value */ - ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; - ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam->class2_alg_type; - ctx->alg_op = OP_TYPE_CLASS2_ALG | caam->alg_op; + ctx->cdata.algtype = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; + ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam->class2_alg_type; return 0; } @@ -4420,7 +3406,7 @@ static void caam_exit_common(struct caam_ctx *ctx) if (ctx->key_dma && !dma_mapping_error(ctx->jrdev, ctx->key_dma)) dma_unmap_single(ctx->jrdev, ctx->key_dma, - ctx->enckeylen + ctx->split_key_pad_len, + ctx->cdata.keylen + ctx->adata.keylen_pad, DMA_TO_DEVICE); caam_jr_free(ctx->jrdev); @@ -4498,7 +3484,6 @@ static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template t_alg->caam.class1_alg_type = template->class1_alg_type; t_alg->caam.class2_alg_type = template->class2_alg_type; - t_alg->caam.alg_op = template->alg_op; return t_alg; } diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c new file mode 100644 index 0000000..f3f48c1 --- /dev/null +++ b/drivers/crypto/caam/caamalg_desc.c @@ -0,0 +1,1306 @@ +/* + * Shared descriptors for aead, ablkcipher algorithms + * + * Copyright 2016 NXP + */ + +#include "compat.h" +#include "desc_constr.h" +#include "caamalg_desc.h" + +/* + * For aead functions, read payload and write payload, + * both of which are specified in req->src and req->dst + */ +static inline void aead_append_src_dst(u32 *desc, u32 msg_type) +{ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | + KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH); +} + +/* Set DK bit in class 1 operation if shared */ +static inline void append_dec_op1(u32 *desc, u32 type) +{ + u32 *jump_cmd, *uncond_jump_cmd; + + /* DK bit is valid only for AES */ + if ((type & OP_ALG_ALGSEL_MASK) != OP_ALG_ALGSEL_AES) { + append_operation(desc, type | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); + return; + } + + jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_SHRD); + append_operation(desc, type | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); + uncond_jump_cmd = append_jump(desc, JUMP_TEST_ALL); + set_jump_tgt_here(desc, jump_cmd); + append_operation(desc, type | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_AAI_DK); + set_jump_tgt_here(desc, uncond_jump_cmd); +} + +/** + * cnstr_shdsc_aead_null_encap - IPSec ESP encapsulation shared descriptor + * (non-protocol) with no (null) encryption. + * @desc: pointer to buffer used for descriptor construction + * @adata: pointer to authentication transform definitions. Note that since a + * split key is to be used, the size of the split key itself is + * specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, + * SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP. + * @icvsize: integrity check value (ICV) size (truncated or full) + * + * Note: Requires an MDHA split key. + */ +void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (adata->key_inline) + append_key_as_imm(desc, adata->key_virt, adata->keylen_pad, + adata->keylen, CLASS_2 | KEY_DEST_MDHA_SPLIT | + KEY_ENC); + else + append_key(desc, adata->key_dma, adata->keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + set_jump_tgt_here(desc, key_jump_cmd); + + /* assoclen + cryptlen = seqinlen */ + append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Prepare to read and write cryptlen + assoclen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | + MOVE_DEST_MATH3 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | + MOVE_DEST_DESCBUF | + MOVE_WAITCOMP | + (0x8 << MOVE_LEN_SHIFT)); + + /* Class 2 operation */ + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Read and write cryptlen bytes */ + aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | + MOVE_AUX_LS); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "aead null enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_aead_null_encap); + +/** + * cnstr_shdsc_aead_null_decap - IPSec ESP decapsulation shared descriptor + * (non-protocol) with no (null) decryption. + * @desc: pointer to buffer used for descriptor construction + * @adata: pointer to authentication transform definitions. Note that since a + * split key is to be used, the size of the split key itself is + * specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, + * SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP. + * @icvsize: integrity check value (ICV) size (truncated or full) + * + * Note: Requires an MDHA split key. + */ +void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd, *jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (adata->key_inline) + append_key_as_imm(desc, adata->key_virt, adata->keylen_pad, + adata->keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + else + append_key(desc, adata->key_dma, adata->keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 2 operation */ + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + /* assoclen + cryptlen = seqoutlen */ + append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* Prepare to read and write cryptlen + assoclen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | + MOVE_DEST_MATH2 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH2 | + MOVE_DEST_DESCBUF | + MOVE_WAITCOMP | + (0x8 << MOVE_LEN_SHIFT)); + + /* Read and write cryptlen bytes */ + aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + /* + * Insert a NOP here, since we need at least 4 instructions between + * code patching the descriptor buffer and the location being patched. + */ + jump_cmd = append_jump(desc, JUMP_TEST_ALL); + set_jump_tgt_here(desc, jump_cmd); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | + MOVE_AUX_LS); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + + /* Load ICV */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "aead null dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_aead_null_decap); + +static void init_sh_desc_key_aead(u32 * const desc, + struct alginfo * const cdata, + struct alginfo * const adata, + const bool is_rfc3686, u32 *nonce) +{ + u32 *key_jump_cmd; + unsigned int enckeylen = cdata->keylen; + + /* Note: Context registers are saved. */ + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* + * RFC3686 specific: + * | key = {AUTH_KEY, ENC_KEY, NONCE} + * | enckeylen = encryption key size + nonce size + */ + if (is_rfc3686) + enckeylen -= CTR_RFC3686_NONCE_SIZE; + + if (adata->key_inline) + append_key_as_imm(desc, adata->key_virt, adata->keylen_pad, + adata->keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + else + append_key(desc, adata->key_dma, adata->keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, enckeylen, + enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, enckeylen, CLASS_1 | + KEY_DEST_CLASS_REG); + + /* Load Counter into CONTEXT1 reg */ + if (is_rfc3686) { + append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, + LDST_CLASS_IND_CCB | + LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); + append_move(desc, + MOVE_SRC_OUTFIFO | + MOVE_DEST_CLASS1CTX | + (16 << MOVE_OFFSET_SHIFT) | + (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); + } + + set_jump_tgt_here(desc, key_jump_cmd); +} + +/** + * cnstr_shdsc_aead_encap - IPSec ESP encapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * @adata: pointer to authentication transform definitions. Note that since a + * split key is to be used, the size of the split key itself is + * specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, + * SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP. + * @icvsize: integrity check value (ICV) size (truncated or full) + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @nonce: pointer to rfc3686 nonce + * @ctx1_iv_off: IV offset in CONTEXT1 register + * + * Note: Requires an MDHA split key. + */ +void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int icvsize, + const bool is_rfc3686, u32 *nonce, + const u32 ctx1_iv_off) +{ + /* Note: Context registers are saved. */ + init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce); + + /* Class 2 operation */ + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Read and write assoclen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + FIFOLDST_VLF); + + /* Load Counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + aead_append_src_dst(desc, FIFOLD_TYPE_MSG1OUT2); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "aead enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_aead_encap); + +/** + * cnstr_shdsc_aead_decap - IPSec ESP decapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * @adata: pointer to authentication transform definitions. Note that since a + * split key is to be used, the size of the split key itself is + * specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, + * SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP. + * @ivsize: initialization vector size + * @icvsize: integrity check value (ICV) size (truncated or full) + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @nonce: pointer to rfc3686 nonce + * @ctx1_iv_off: IV offset in CONTEXT1 register + * + * Note: Requires an MDHA split key. + */ +void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool geniv, + const bool is_rfc3686, u32 *nonce, + const u32 ctx1_iv_off) +{ + /* Note: Context registers are saved. */ + init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce); + + /* Class 2 operation */ + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + /* Read and write assoclen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + if (geniv) + append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize); + else + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + + if (geniv) { + append_seq_load(desc, ivsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + (ctx1_iv_off << LDST_OFFSET_SHIFT)); + append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO | + (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize); + } + + /* Load Counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + /* Choose operation */ + if (ctx1_iv_off) + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); + else + append_dec_op1(desc, cdata->algtype); + + /* Read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + aead_append_src_dst(desc, FIFOLD_TYPE_MSG); + + /* Load ICV */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "aead dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_aead_decap); + +/** + * cnstr_shdsc_aead_givencap - IPSec ESP encapsulation shared descriptor + * (non-protocol) with HW-generated initialization + * vector. + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * @adata: pointer to authentication transform definitions. Note that since a + * split key is to be used, the size of the split key itself is + * specified. Valid algorithm values - one of OP_ALG_ALGSEL_{MD5, SHA1, + * SHA224, SHA256, SHA384, SHA512} ANDed with OP_ALG_AAI_HMAC_PRECOMP. + * @ivsize: initialization vector size + * @icvsize: integrity check value (ICV) size (truncated or full) + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @nonce: pointer to rfc3686 nonce + * @ctx1_iv_off: IV offset in CONTEXT1 register + * + * Note: Requires an MDHA split key. + */ +void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool is_rfc3686, + u32 *nonce, const u32 ctx1_iv_off) +{ + u32 geniv, moveiv; + + /* Note: Context registers are saved. */ + init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce); + + if (is_rfc3686) + goto copy_iv; + + /* Generate IV */ + geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | + NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | + NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); + append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | + LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_WAITCOMP | + MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX | + (ctx1_iv_off << MOVE_OFFSET_SHIFT) | + (ivsize << MOVE_LEN_SHIFT)); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + +copy_iv: + /* Copy IV to class 1 context */ + append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO | + (ctx1_iv_off << MOVE_OFFSET_SHIFT) | + (ivsize << MOVE_LEN_SHIFT)); + + /* Return to encryption */ + append_operation(desc, adata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Read and write assoclen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + + /* Copy iv from outfifo to class 2 fifo */ + moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 | + NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT); + append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB | + LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); + append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB | + LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM); + + /* Load Counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Will write ivsize + cryptlen */ + append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Not need to reload iv */ + append_seq_fifo_load(desc, ivsize, + FIFOLD_CLASS_SKIP); + + /* Will read cryptlen */ + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | + FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "aead givenc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_aead_givencap); + +/** + * cnstr_shdsc_gcm_encap - gcm encapsulation shared descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1, + *zero_assoc_jump_cmd2; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* skip key loading if they are loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD | JUMP_COND_SELF); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* if assoclen + cryptlen is ZERO, skip to ICV write */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + /* if assoclen is ZERO, skip reading the assoc data */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* cryptlen = seqinlen - assoclen */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ); + + /* if cryptlen is ZERO jump to zero-payload commands */ + zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + /* read assoc data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + set_jump_tgt_here(desc, zero_assoc_jump_cmd1); + + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* write encrypted data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); + + /* read payload data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); + + /* jump the zero-payload commands */ + append_jump(desc, JUMP_TEST_ALL | 2); + + /* zero-payload commands */ + set_jump_tgt_here(desc, zero_payload_jump_cmd); + + /* read assoc data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1); + + /* There is no input data */ + set_jump_tgt_here(desc, zero_assoc_jump_cmd2); + + /* write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "gcm enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_gcm_encap); + +/** + * cnstr_shdsc_gcm_decap - gcm decapsulation shared descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *zero_payload_jump_cmd, *zero_assoc_jump_cmd1; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* skip key loading if they are loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | + JUMP_TEST_ALL | JUMP_COND_SHRD | + JUMP_COND_SELF); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + /* if assoclen is ZERO, skip reading the assoc data */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* read assoc data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + + set_jump_tgt_here(desc, zero_assoc_jump_cmd1); + + /* cryptlen = seqoutlen - assoclen */ + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* jump to zero-payload command if cryptlen is zero */ + zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* store encrypted data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); + + /* read payload data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + /* zero-payload command */ + set_jump_tgt_here(desc, zero_payload_jump_cmd); + + /* read ICV */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 | + FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, "gcm dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_gcm_decap); + +/** + * cnstr_shdsc_rfc4106_encap - IPSec ESP gcm encapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip key loading if it is loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* Read assoc data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + + /* Skip IV */ + append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); + + /* Will read cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG); + + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* cryptlen = seqoutlen - assoclen */ + append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ); + + /* Write encrypted data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); + + /* Read payload data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "rfc4106 enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_rfc4106_encap); + +/** + * cnstr_shdsc_rfc4106_decap - IPSec ESP gcm decapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip key loading if it is loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* Read assoc data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + + /* Skip IV */ + append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); + + /* Will read cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ); + + /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG); + + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* Will write cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* Store payload data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); + + /* Read encrypted data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | + FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + /* Read ICV */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 | + FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "rfc4106 dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_rfc4106_decap); + +/** + * cnstr_shdsc_rfc4543_encap - IPSec ESP gmac encapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip key loading if it is loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* assoclen + cryptlen = seqinlen */ + append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | + (0x8 << MOVE_LEN_SHIFT)); + + /* Will read assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Will write assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Read and write assoclen + cryptlen bytes */ + aead_append_src_dst(desc, FIFOLD_TYPE_AAD); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + /* Move payload data to OFIFO */ + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); + + /* Write ICV */ + append_seq_store(desc, icvsize, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "rfc4543 enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_rfc4543_encap); + +/** + * cnstr_shdsc_rfc4543_decap - IPSec ESP gmac decapsulation shared descriptor + * (non-protocol). + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_GCM. + * @icvsize: integrity check value (ICV) size (truncated or full) + */ +void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize) +{ + u32 *key_jump_cmd, *read_move_cmd, *write_move_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip key loading if it is loaded due to sharing */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (cdata->key_inline) + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + else + append_key(desc, cdata->key_dma, cdata->keylen, CLASS_1 | + KEY_DEST_CLASS_REG); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 1 operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + /* assoclen + cryptlen = seqoutlen */ + append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | + (0x8 << MOVE_LEN_SHIFT)); + + /* Will read assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* Will write assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* Store payload data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); + + /* In-snoop assoclen + cryptlen data */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF | + FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + /* Move payload data to OFIFO */ + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + + /* Read ICV */ + append_seq_fifo_load(desc, icvsize, FIFOLD_CLASS_CLASS1 | + FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "rfc4543 dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_rfc4543_decap); + +/* + * For ablkcipher encrypt and decrypt, read from req->src and + * write to req->dst + */ +static inline void ablkcipher_append_src_dst(u32 *desc) +{ + append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | + KEY_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); +} + +/** + * cnstr_shdsc_ablkcipher_encap - ablkcipher encapsulation shared descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * @ivsize: initialization vector size + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @ctx1_iv_off: IV offset in CONTEXT1 register + */ +void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off) +{ + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* Load class1 key only */ + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + + /* Load nonce into CONTEXT1 reg */ + if (is_rfc3686) { + u8 *nonce = cdata->key_virt + cdata->keylen; + + append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, + LDST_CLASS_IND_CCB | + LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); + append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO | + MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) | + (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); + } + + set_jump_tgt_here(desc, key_jump_cmd); + + /* Load iv */ + append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | + LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); + + /* Load counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + /* Load operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "ablkcipher enc shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_encap); + +/** + * cnstr_shdsc_ablkcipher_decap - ablkcipher decapsulation shared descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC or OP_ALG_AAI_CTR_MOD128. + * @ivsize: initialization vector size + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @ctx1_iv_off: IV offset in CONTEXT1 register + */ +void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off) +{ + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* Load class1 key only */ + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + + /* Load nonce into CONTEXT1 reg */ + if (is_rfc3686) { + u8 *nonce = cdata->key_virt + cdata->keylen; + + append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, + LDST_CLASS_IND_CCB | + LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); + append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO | + MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) | + (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); + } + + set_jump_tgt_here(desc, key_jump_cmd); + + /* load IV */ + append_seq_load(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | + LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); + + /* Load counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + /* Choose operation */ + if (ctx1_iv_off) + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_DECRYPT); + else + append_dec_op1(desc, cdata->algtype); + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "ablkcipher dec shdesc@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_decap); + +/** + * cnstr_shdsc_ablkcipher_givencap - ablkcipher encapsulation shared descriptor + * with HW-generated initialization vector. + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - one of OP_ALG_ALGSEL_{AES, DES, 3DES} ANDed + * with OP_ALG_AAI_CBC. + * @ivsize: initialization vector size + * @is_rfc3686: true when ctr(aes) is wrapped by rfc3686 template + * @ctx1_iv_off: IV offset in CONTEXT1 register + */ +void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off) +{ + u32 *key_jump_cmd, geniv; + + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* Load class1 key only */ + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + + /* Load Nonce into CONTEXT1 reg */ + if (is_rfc3686) { + u8 *nonce = cdata->key_virt + cdata->keylen; + + append_load_as_imm(desc, nonce, CTR_RFC3686_NONCE_SIZE, + LDST_CLASS_IND_CCB | + LDST_SRCDST_BYTE_OUTFIFO | LDST_IMM); + append_move(desc, MOVE_WAITCOMP | MOVE_SRC_OUTFIFO | + MOVE_DEST_CLASS1CTX | (16 << MOVE_OFFSET_SHIFT) | + (CTR_RFC3686_NONCE_SIZE << MOVE_LEN_SHIFT)); + } + set_jump_tgt_here(desc, key_jump_cmd); + + /* Generate IV */ + geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | + NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | NFIFOENTRY_PTYPE_RND | + (ivsize << NFIFOENTRY_DLEN_SHIFT); + append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | + LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO | + MOVE_DEST_CLASS1CTX | (ivsize << MOVE_LEN_SHIFT) | + (ctx1_iv_off << MOVE_OFFSET_SHIFT)); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + + /* Copy generated IV to memory */ + append_seq_store(desc, ivsize, LDST_SRCDST_BYTE_CONTEXT | + LDST_CLASS_1_CCB | (ctx1_iv_off << LDST_OFFSET_SHIFT)); + + /* Load Counter into CONTEXT1 reg */ + if (is_rfc3686) + append_load_imm_be32(desc, 1, LDST_IMM | LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + ((ctx1_iv_off + CTR_RFC3686_IV_SIZE) << + LDST_OFFSET_SHIFT)); + + if (ctx1_iv_off) + append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NCP | + (1 << JUMP_OFFSET_SHIFT)); + + /* Load operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "ablkcipher givenc shdesc@" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_ablkcipher_givencap); + +/** + * cnstr_shdsc_xts_ablkcipher_encap - xts ablkcipher encapsulation shared + * descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS. + */ +void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata) +{ + __be64 sector_size = cpu_to_be64(512); + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* Load class1 keys only */ + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + + /* Load sector size with index 40 bytes (0x28) */ + append_load_as_imm(desc, (void *)§or_size, 8, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + (0x28 << LDST_OFFSET_SHIFT)); + + set_jump_tgt_here(desc, key_jump_cmd); + + /* + * create sequence for loading the sector index + * Upper 8B of IV - will be used as sector index + * Lower 8B of IV - will be discarded + */ + append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | + (0x20 << LDST_OFFSET_SHIFT)); + append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); + + /* Load operation */ + append_operation(desc, cdata->algtype | OP_ALG_AS_INITFINAL | + OP_ALG_ENCRYPT); + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "xts ablkcipher enc shdesc@" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_encap); + +/** + * cnstr_shdsc_xts_ablkcipher_decap - xts ablkcipher decapsulation shared + * descriptor + * @desc: pointer to buffer used for descriptor construction + * @cdata: pointer to block cipher transform definitions + * Valid algorithm values - OP_ALG_ALGSEL_AES ANDed with OP_ALG_AAI_XTS. + */ +void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata) +{ + __be64 sector_size = cpu_to_be64(512); + u32 *key_jump_cmd; + + init_sh_desc(desc, HDR_SHARE_SERIAL | HDR_SAVECTX); + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + + /* Load class1 key only */ + append_key_as_imm(desc, cdata->key_virt, cdata->keylen, + cdata->keylen, CLASS_1 | KEY_DEST_CLASS_REG); + + /* Load sector size with index 40 bytes (0x28) */ + append_load_as_imm(desc, (void *)§or_size, 8, LDST_CLASS_1_CCB | + LDST_SRCDST_BYTE_CONTEXT | + (0x28 << LDST_OFFSET_SHIFT)); + + set_jump_tgt_here(desc, key_jump_cmd); + + /* + * create sequence for loading the sector index + * Upper 8B of IV - will be used as sector index + * Lower 8B of IV - will be discarded + */ + append_seq_load(desc, 8, LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | + (0x20 << LDST_OFFSET_SHIFT)); + append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP); + + /* Load operation */ + append_dec_op1(desc, cdata->algtype); + + /* Perform operation */ + ablkcipher_append_src_dst(desc); + +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "xts ablkcipher dec shdesc@" __stringify(__LINE__) ": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, desc_bytes(desc), 1); +#endif +} +EXPORT_SYMBOL(cnstr_shdsc_xts_ablkcipher_decap); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FSL CAAM descriptor support"); +MODULE_AUTHOR("Freescale Semiconductor - NMG/STC"); diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h new file mode 100644 index 0000000..9555173 --- /dev/null +++ b/drivers/crypto/caam/caamalg_desc.h @@ -0,0 +1,97 @@ +/* + * Shared descriptors for aead, ablkcipher algorithms + * + * Copyright 2016 NXP + */ + +#ifndef _CAAMALG_DESC_H_ +#define _CAAMALG_DESC_H_ + +/* length of descriptors text */ +#define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) +#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 11 * CAAM_CMD_SZ) +#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) +#define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ) + +/* Note: Nonce is counted in cdata.keylen */ +#define DESC_AEAD_CTR_RFC3686_LEN (4 * CAAM_CMD_SZ) + +#define DESC_AEAD_NULL_BASE (3 * CAAM_CMD_SZ) +#define DESC_AEAD_NULL_ENC_LEN (DESC_AEAD_NULL_BASE + 11 * CAAM_CMD_SZ) +#define DESC_AEAD_NULL_DEC_LEN (DESC_AEAD_NULL_BASE + 13 * CAAM_CMD_SZ) + +#define DESC_GCM_BASE (3 * CAAM_CMD_SZ) +#define DESC_GCM_ENC_LEN (DESC_GCM_BASE + 16 * CAAM_CMD_SZ) +#define DESC_GCM_DEC_LEN (DESC_GCM_BASE + 12 * CAAM_CMD_SZ) + +#define DESC_RFC4106_BASE (3 * CAAM_CMD_SZ) +#define DESC_RFC4106_ENC_LEN (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ) +#define DESC_RFC4106_DEC_LEN (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ) + +#define DESC_RFC4543_BASE (3 * CAAM_CMD_SZ) +#define DESC_RFC4543_ENC_LEN (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ) +#define DESC_RFC4543_DEC_LEN (DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ) + +#define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) +#define DESC_ABLKCIPHER_ENC_LEN (DESC_ABLKCIPHER_BASE + \ + 20 * CAAM_CMD_SZ) +#define DESC_ABLKCIPHER_DEC_LEN (DESC_ABLKCIPHER_BASE + \ + 15 * CAAM_CMD_SZ) + +void cnstr_shdsc_aead_null_encap(u32 * const desc, struct alginfo *adata, + unsigned int icvsize); + +void cnstr_shdsc_aead_null_decap(u32 * const desc, struct alginfo *adata, + unsigned int icvsize); + +void cnstr_shdsc_aead_encap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int icvsize, + const bool is_rfc3686, u32 *nonce, + const u32 ctx1_iv_off); + +void cnstr_shdsc_aead_decap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool geniv, + const bool is_rfc3686, u32 *nonce, + const u32 ctx1_iv_off); + +void cnstr_shdsc_aead_givencap(u32 * const desc, struct alginfo *cdata, + struct alginfo *adata, unsigned int ivsize, + unsigned int icvsize, const bool is_rfc3686, + u32 *nonce, const u32 ctx1_iv_off); + +void cnstr_shdsc_gcm_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_gcm_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_rfc4106_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_rfc4543_encap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_rfc4543_decap(u32 * const desc, struct alginfo *cdata, + unsigned int icvsize); + +void cnstr_shdsc_ablkcipher_encap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off); + +void cnstr_shdsc_ablkcipher_decap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off); + +void cnstr_shdsc_ablkcipher_givencap(u32 * const desc, struct alginfo *cdata, + unsigned int ivsize, const bool is_rfc3686, + const u32 ctx1_iv_off); + +void cnstr_shdsc_xts_ablkcipher_encap(u32 * const desc, struct alginfo *cdata); + +void cnstr_shdsc_xts_ablkcipher_decap(u32 * const desc, struct alginfo *cdata); + +#endif /* _CAAMALG_DESC_H_ */ diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 660dc20..e58639e 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -72,7 +72,7 @@ #define CAAM_MAX_HASH_DIGEST_SIZE SHA512_DIGEST_SIZE /* length of descriptors text */ -#define DESC_AHASH_BASE (4 * CAAM_CMD_SZ) +#define DESC_AHASH_BASE (3 * CAAM_CMD_SZ) #define DESC_AHASH_UPDATE_LEN (6 * CAAM_CMD_SZ) #define DESC_AHASH_UPDATE_FIRST_LEN (DESC_AHASH_BASE + 4 * CAAM_CMD_SZ) #define DESC_AHASH_FINAL_LEN (DESC_AHASH_BASE + 5 * CAAM_CMD_SZ) @@ -103,20 +103,15 @@ struct caam_hash_ctx { u32 sh_desc_update_first[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_fin[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; u32 sh_desc_digest[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; - u32 sh_desc_finup[DESC_HASH_MAX_USED_LEN] ____cacheline_aligned; dma_addr_t sh_desc_update_dma ____cacheline_aligned; dma_addr_t sh_desc_update_first_dma; dma_addr_t sh_desc_fin_dma; dma_addr_t sh_desc_digest_dma; - dma_addr_t sh_desc_finup_dma; struct device *jrdev; - u32 alg_type; - u32 alg_op; u8 key[CAAM_MAX_HASH_KEY_SIZE]; dma_addr_t key_dma; int ctx_len; - unsigned int split_key_len; - unsigned int split_key_pad_len; + struct alginfo adata; }; /* ahash state */ @@ -222,89 +217,54 @@ static inline int ctx_map_to_sec4_sg(u32 *desc, struct device *jrdev, return 0; } -/* Common shared descriptor commands */ -static inline void append_key_ahash(u32 *desc, struct caam_hash_ctx *ctx) -{ - append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, - ctx->split_key_len, CLASS_2 | - KEY_DEST_MDHA_SPLIT | KEY_ENC); -} - -/* Append key if it has been set */ -static inline void init_sh_desc_key_ahash(u32 *desc, struct caam_hash_ctx *ctx) -{ - u32 *key_jump_cmd; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - if (ctx->split_key_len) { - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - append_key_ahash(desc, ctx); - - set_jump_tgt_here(desc, key_jump_cmd); - } - - /* Propagate errors from shared to job descriptor */ - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); -} - /* - * For ahash read data from seqin following state->caam_ctx, - * and write resulting class2 context to seqout, which may be state->caam_ctx - * or req->result + * For ahash update, final and finup (import_ctx = true) + * import context, read and write to seqout + * For ahash firsts and digest (import_ctx = false) + * read and write to seqout */ -static inline void ahash_append_load_str(u32 *desc, int digestsize) +static inline void ahash_gen_sh_desc(u32 *desc, u32 state, int digestsize, + struct caam_hash_ctx *ctx, bool import_ctx) { - /* Calculate remaining bytes to read */ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Read remaining bytes */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 | - FIFOLD_TYPE_MSG | KEY_VLF); + u32 op = ctx->adata.algtype; + u32 *skip_key_load; - /* Store class2 context bytes */ - append_seq_store(desc, digestsize, LDST_CLASS_2_CCB | - LDST_SRCDST_BYTE_CONTEXT); -} + init_sh_desc(desc, HDR_SHARE_SERIAL); -/* - * For ahash update, final and finup, import context, read and write to seqout - */ -static inline void ahash_ctx_data_to_out(u32 *desc, u32 op, u32 state, - int digestsize, - struct caam_hash_ctx *ctx) -{ - init_sh_desc_key_ahash(desc, ctx); + /* Append key if it has been set; ahash update excluded */ + if ((state != OP_ALG_AS_UPDATE) && (ctx->adata.keylen)) { + /* Skip key loading if already shared */ + skip_key_load = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); - /* Import context from software */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_2_CCB | ctx->ctx_len); + append_key_as_imm(desc, ctx->key, ctx->adata.keylen_pad, + ctx->adata.keylen, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); - /* Class 2 operation */ - append_operation(desc, op | state | OP_ALG_ENCRYPT); + set_jump_tgt_here(desc, skip_key_load); - /* - * Load from buf and/or src and write to req->result or state->context - */ - ahash_append_load_str(desc, digestsize); -} + op |= OP_ALG_AAI_HMAC_PRECOMP; + } -/* For ahash firsts and digest, read and write to seqout */ -static inline void ahash_data_to_out(u32 *desc, u32 op, u32 state, - int digestsize, struct caam_hash_ctx *ctx) -{ - init_sh_desc_key_ahash(desc, ctx); + /* If needed, import context from software */ + if (import_ctx) + append_seq_load(desc, ctx->ctx_len, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); /* Class 2 operation */ append_operation(desc, op | state | OP_ALG_ENCRYPT); /* * Load from buf and/or src and write to req->result or state->context + * Calculate remaining bytes to read */ - ahash_append_load_str(desc, digestsize); + append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + /* Read remaining bytes */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 | + FIFOLD_TYPE_MSG | KEY_VLF); + /* Store class2 context bytes */ + append_seq_store(desc, digestsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); } static int ahash_set_sh_desc(struct crypto_ahash *ahash) @@ -312,28 +272,11 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; - u32 have_key = 0; u32 *desc; - if (ctx->split_key_len) - have_key = OP_ALG_AAI_HMAC_PRECOMP; - /* ahash_update shared descriptor */ desc = ctx->sh_desc_update; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Import context from software */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | - LDST_CLASS_2_CCB | ctx->ctx_len); - - /* Class 2 operation */ - append_operation(desc, ctx->alg_type | OP_ALG_AS_UPDATE | - OP_ALG_ENCRYPT); - - /* Load data and write to result or context */ - ahash_append_load_str(desc, ctx->ctx_len); - + ahash_gen_sh_desc(desc, OP_ALG_AS_UPDATE, ctx->ctx_len, ctx, true); ctx->sh_desc_update_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->sh_desc_update_dma)) { @@ -348,10 +291,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_update_first shared descriptor */ desc = ctx->sh_desc_update_first; - - ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INIT, - ctx->ctx_len, ctx); - + ahash_gen_sh_desc(desc, OP_ALG_AS_INIT, ctx->ctx_len, ctx, false); ctx->sh_desc_update_first_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -367,10 +307,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) /* ahash_final shared descriptor */ desc = ctx->sh_desc_fin; - - ahash_ctx_data_to_out(desc, have_key | ctx->alg_type, - OP_ALG_AS_FINALIZE, digestsize, ctx); - + ahash_gen_sh_desc(desc, OP_ALG_AS_FINALIZE, digestsize, ctx, true); ctx->sh_desc_fin_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->sh_desc_fin_dma)) { @@ -383,30 +320,9 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) desc_bytes(desc), 1); #endif - /* ahash_finup shared descriptor */ - desc = ctx->sh_desc_finup; - - ahash_ctx_data_to_out(desc, have_key | ctx->alg_type, - OP_ALG_AS_FINALIZE, digestsize, ctx); - - ctx->sh_desc_finup_dma = dma_map_single(jrdev, desc, desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_finup_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } -#ifdef DEBUG - print_hex_dump(KERN_ERR, "ahash finup shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif - /* ahash_digest shared descriptor */ desc = ctx->sh_desc_digest; - - ahash_data_to_out(desc, have_key | ctx->alg_type, OP_ALG_AS_INITFINAL, - digestsize, ctx); - + ahash_gen_sh_desc(desc, OP_ALG_AS_INITFINAL, digestsize, ctx, false); ctx->sh_desc_digest_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); @@ -424,14 +340,6 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) return 0; } -static int gen_split_hash_key(struct caam_hash_ctx *ctx, const u8 *key_in, - u32 keylen) -{ - return gen_split_key(ctx->jrdev, ctx->key, ctx->split_key_len, - ctx->split_key_pad_len, key_in, keylen, - ctx->alg_op); -} - /* Digest hash size if it is too large */ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, u32 *keylen, u8 *key_out, u32 digestsize) @@ -467,7 +375,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, } /* Job descriptor to perform unkeyed hash on key_in */ - append_operation(desc, ctx->alg_type | OP_ALG_ENCRYPT | + append_operation(desc, ctx->adata.algtype | OP_ALG_ENCRYPT | OP_ALG_AS_INITFINAL); append_seq_in_ptr(desc, src_dma, *keylen, 0); append_seq_fifo_load(desc, *keylen, FIFOLD_CLASS_CLASS2 | @@ -511,8 +419,6 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, const u8 *key_in, static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ - static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); struct device *jrdev = ctx->jrdev; int blocksize = crypto_tfm_alg_blocksize(&ahash->base); @@ -537,23 +443,12 @@ static int ahash_setkey(struct crypto_ahash *ahash, key = hashed_key; } - /* Pick class 2 key length from algorithm submask */ - ctx->split_key_len = mdpadlen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> - OP_ALG_ALGSEL_SHIFT] * 2; - ctx->split_key_pad_len = ALIGN(ctx->split_key_len, 16); - -#ifdef DEBUG - printk(KERN_ERR "split_key_len %d split_key_pad_len %d\n", - ctx->split_key_len, ctx->split_key_pad_len); - print_hex_dump(KERN_ERR, "key in @"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1); -#endif - - ret = gen_split_hash_key(ctx, key, keylen); + ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key, keylen, + CAAM_MAX_HASH_KEY_SIZE); if (ret) goto bad_free_key; - ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->split_key_pad_len, + ctx->key_dma = dma_map_single(jrdev, ctx->key, ctx->adata.keylen_pad, DMA_TO_DEVICE); if (dma_mapping_error(jrdev, ctx->key_dma)) { dev_err(jrdev, "unable to map key i/o memory\n"); @@ -563,14 +458,15 @@ static int ahash_setkey(struct crypto_ahash *ahash, #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, ctx->key, - ctx->split_key_pad_len, 1); + ctx->adata.keylen_pad, 1); #endif ret = ahash_set_sh_desc(ahash); if (ret) { - dma_unmap_single(jrdev, ctx->key_dma, ctx->split_key_pad_len, + dma_unmap_single(jrdev, ctx->key_dma, ctx->adata.keylen_pad, DMA_TO_DEVICE); } + error_free_key: kfree(hashed_key); return ret; @@ -639,8 +535,7 @@ static void ahash_done(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ahash_edesc *)((char *)desc - - offsetof(struct ahash_edesc, hw_desc)); + edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -674,8 +569,7 @@ static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ahash_edesc *)((char *)desc - - offsetof(struct ahash_edesc, hw_desc)); + edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -709,8 +603,7 @@ static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ahash_edesc *)((char *)desc - - offsetof(struct ahash_edesc, hw_desc)); + edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -744,8 +637,7 @@ static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err, dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); #endif - edesc = (struct ahash_edesc *)((char *)desc - - offsetof(struct ahash_edesc, hw_desc)); + edesc = container_of(desc, struct ahash_edesc, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -1078,7 +970,7 @@ static int ahash_finup_ctx(struct ahash_request *req) /* allocate space for base edesc and hw desc commands, link tables */ edesc = ahash_edesc_alloc(ctx, sec4_sg_src_index + mapped_nents, - ctx->sh_desc_finup, ctx->sh_desc_finup_dma, + ctx->sh_desc_fin, ctx->sh_desc_fin_dma, flags); if (!edesc) { dma_unmap_sg(jrdev, req->src, src_nents, DMA_TO_DEVICE); @@ -1683,7 +1575,6 @@ struct caam_hash_template { unsigned int blocksize; struct ahash_alg template_ahash; u32 alg_type; - u32 alg_op; }; /* ahash descriptors */ @@ -1709,7 +1600,6 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_SHA1, - .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, }, { .name = "sha224", .driver_name = "sha224-caam", @@ -1731,7 +1621,6 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_SHA224, - .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, }, { .name = "sha256", .driver_name = "sha256-caam", @@ -1753,7 +1642,6 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_SHA256, - .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, }, { .name = "sha384", .driver_name = "sha384-caam", @@ -1775,7 +1663,6 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_SHA384, - .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, }, { .name = "sha512", .driver_name = "sha512-caam", @@ -1797,7 +1684,6 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_SHA512, - .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, { .name = "md5", .driver_name = "md5-caam", @@ -1819,14 +1705,12 @@ static struct caam_hash_template driver_hash[] = { }, }, .alg_type = OP_ALG_ALGSEL_MD5, - .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, }, }; struct caam_hash_alg { struct list_head entry; int alg_type; - int alg_op; struct ahash_alg ahash_alg; }; @@ -1859,10 +1743,10 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) return PTR_ERR(ctx->jrdev); } /* copy descriptor header template value */ - ctx->alg_type = OP_TYPE_CLASS2_ALG | caam_hash->alg_type; - ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_hash->alg_op; + ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type; - ctx->ctx_len = runninglen[(ctx->alg_op & OP_ALG_ALGSEL_SUBMASK) >> + ctx->ctx_len = runninglen[(ctx->adata.algtype & + OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT]; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), @@ -1893,10 +1777,6 @@ static void caam_hash_cra_exit(struct crypto_tfm *tfm) dma_unmap_single(ctx->jrdev, ctx->sh_desc_digest_dma, desc_bytes(ctx->sh_desc_digest), DMA_TO_DEVICE); - if (ctx->sh_desc_finup_dma && - !dma_mapping_error(ctx->jrdev, ctx->sh_desc_finup_dma)) - dma_unmap_single(ctx->jrdev, ctx->sh_desc_finup_dma, - desc_bytes(ctx->sh_desc_finup), DMA_TO_DEVICE); caam_jr_free(ctx->jrdev); } @@ -1956,7 +1836,6 @@ caam_hash_alloc(struct caam_hash_template *template, alg->cra_type = &crypto_ahash_type; t_alg->alg_type = template->alg_type; - t_alg->alg_op = template->alg_op; return t_alg; } diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 851015e..32100c4 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -395,7 +395,7 @@ static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct rsa_key raw_key = {0}; + struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; @@ -441,7 +441,7 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct rsa_key raw_key = {0}; + struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 9b92af2..41398da 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -52,7 +52,7 @@ /* length of descriptors */ #define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2) -#define DESC_RNG_LEN (4 * CAAM_CMD_SZ) +#define DESC_RNG_LEN (3 * CAAM_CMD_SZ) /* Buffer, its dma address and lock */ struct buf_data { @@ -100,8 +100,7 @@ static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context) { struct buf_data *bd; - bd = (struct buf_data *)((char *)desc - - offsetof(struct buf_data, hw_desc)); + bd = container_of(desc, struct buf_data, hw_desc[0]); if (err) caam_jr_strstatus(jrdev, err); @@ -196,9 +195,6 @@ static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx) init_sh_desc(desc, HDR_SHARE_SERIAL); - /* Propagate errors from shared to job descriptor */ - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); - /* Generate random bytes */ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG); @@ -351,7 +347,7 @@ static int __init caam_rng_init(void) pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(dev); } - rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA); + rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL); if (!rng_ctx) { err = -ENOMEM; goto free_caam_alloc; diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index e483b78..7551098 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -330,8 +330,8 @@ static int caam_remove(struct platform_device *pdev) clk_disable_unprepare(ctrlpriv->caam_ipg); clk_disable_unprepare(ctrlpriv->caam_mem); clk_disable_unprepare(ctrlpriv->caam_aclk); - clk_disable_unprepare(ctrlpriv->caam_emi_slow); - + if (ctrlpriv->caam_emi_slow) + clk_disable_unprepare(ctrlpriv->caam_emi_slow); return 0; } @@ -365,11 +365,8 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) */ val = (rd_reg32(&r4tst->rtsdctl) & RTSDCTL_ENT_DLY_MASK) >> RTSDCTL_ENT_DLY_SHIFT; - if (ent_delay <= val) { - /* put RNG4 into run mode */ - clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0); - return; - } + if (ent_delay <= val) + goto start_rng; val = rd_reg32(&r4tst->rtsdctl); val = (val & ~RTSDCTL_ENT_DLY_MASK) | @@ -381,15 +378,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE); /* read the control register */ val = rd_reg32(&r4tst->rtmctl); +start_rng: /* * select raw sampling in both entropy shifter - * and statistical checker + * and statistical checker; ; put RNG4 into run mode */ - clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC); - /* put RNG4 into run mode */ - clrsetbits_32(&val, RTMCTL_PRGM, 0); - /* write back the control register */ - wr_reg32(&r4tst->rtmctl, val); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC); } /** @@ -482,14 +476,16 @@ static int caam_probe(struct platform_device *pdev) } ctrlpriv->caam_aclk = clk; - clk = caam_drv_identify_clk(&pdev->dev, "emi_slow"); - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - dev_err(&pdev->dev, - "can't identify CAAM emi_slow clk: %d\n", ret); - return ret; + if (!of_machine_is_compatible("fsl,imx6ul")) { + clk = caam_drv_identify_clk(&pdev->dev, "emi_slow"); + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); + dev_err(&pdev->dev, + "can't identify CAAM emi_slow clk: %d\n", ret); + return ret; + } + ctrlpriv->caam_emi_slow = clk; } - ctrlpriv->caam_emi_slow = clk; ret = clk_prepare_enable(ctrlpriv->caam_ipg); if (ret < 0) { @@ -510,11 +506,13 @@ static int caam_probe(struct platform_device *pdev) goto disable_caam_mem; } - ret = clk_prepare_enable(ctrlpriv->caam_emi_slow); - if (ret < 0) { - dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n", - ret); - goto disable_caam_aclk; + if (ctrlpriv->caam_emi_slow) { + ret = clk_prepare_enable(ctrlpriv->caam_emi_slow); + if (ret < 0) { + dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n", + ret); + goto disable_caam_aclk; + } } /* Get configuration properties from device tree */ @@ -541,13 +539,13 @@ static int caam_probe(struct platform_device *pdev) else BLOCK_OFFSET = PG_SIZE_64K; - ctrlpriv->ctrl = (struct caam_ctrl __force *)ctrl; - ctrlpriv->assure = (struct caam_assurance __force *) - ((uint8_t *)ctrl + + ctrlpriv->ctrl = (struct caam_ctrl __iomem __force *)ctrl; + ctrlpriv->assure = (struct caam_assurance __iomem __force *) + ((__force uint8_t *)ctrl + BLOCK_OFFSET * ASSURE_BLOCK_NUMBER ); - ctrlpriv->deco = (struct caam_deco __force *) - ((uint8_t *)ctrl + + ctrlpriv->deco = (struct caam_deco __iomem __force *) + ((__force uint8_t *)ctrl + BLOCK_OFFSET * DECO_BLOCK_NUMBER ); @@ -627,8 +625,8 @@ static int caam_probe(struct platform_device *pdev) ring); continue; } - ctrlpriv->jr[ring] = (struct caam_job_ring __force *) - ((uint8_t *)ctrl + + ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *) + ((__force uint8_t *)ctrl + (ring + JR_BLOCK_NUMBER) * BLOCK_OFFSET ); @@ -641,8 +639,8 @@ static int caam_probe(struct platform_device *pdev) !!(rd_reg32(&ctrl->perfmon.comp_parms_ms) & CTPR_MS_QI_MASK); if (ctrlpriv->qi_present) { - ctrlpriv->qi = (struct caam_queue_if __force *) - ((uint8_t *)ctrl + + ctrlpriv->qi = (struct caam_queue_if __iomem __force *) + ((__force uint8_t *)ctrl + BLOCK_OFFSET * QI_BLOCK_NUMBER ); /* This is all that's required to physically enable QI */ @@ -800,7 +798,7 @@ static int caam_probe(struct platform_device *pdev) &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ - ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0]; + ctrlpriv->ctl_kek_wrap.data = (__force void *)&ctrlpriv->ctrl->kek[0]; ctrlpriv->ctl_kek_wrap.size = KEK_KEY_SIZE * sizeof(u32); ctrlpriv->ctl_kek = debugfs_create_blob("kek", S_IRUSR | @@ -808,7 +806,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl, &ctrlpriv->ctl_kek_wrap); - ctrlpriv->ctl_tkek_wrap.data = &ctrlpriv->ctrl->tkek[0]; + ctrlpriv->ctl_tkek_wrap.data = (__force void *)&ctrlpriv->ctrl->tkek[0]; ctrlpriv->ctl_tkek_wrap.size = KEK_KEY_SIZE * sizeof(u32); ctrlpriv->ctl_tkek = debugfs_create_blob("tkek", S_IRUSR | @@ -816,7 +814,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl, &ctrlpriv->ctl_tkek_wrap); - ctrlpriv->ctl_tdsk_wrap.data = &ctrlpriv->ctrl->tdsk[0]; + ctrlpriv->ctl_tdsk_wrap.data = (__force void *)&ctrlpriv->ctrl->tdsk[0]; ctrlpriv->ctl_tdsk_wrap.size = KEK_KEY_SIZE * sizeof(u32); ctrlpriv->ctl_tdsk = debugfs_create_blob("tdsk", S_IRUSR | @@ -833,7 +831,8 @@ caam_remove: iounmap_ctrl: iounmap(ctrl); disable_caam_emi_slow: - clk_disable_unprepare(ctrlpriv->caam_emi_slow); + if (ctrlpriv->caam_emi_slow) + clk_disable_unprepare(ctrlpriv->caam_emi_slow); disable_caam_aclk: clk_disable_unprepare(ctrlpriv->caam_aclk); disable_caam_mem: diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 513b664..2e6766a 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -22,12 +22,6 @@ #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ #define SEC4_SG_OFFSET_MASK 0x00001fff -struct sec4_sg_entry { - u64 ptr; - u32 len; - u32 bpid_offset; -}; - /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ #define MAX_CAAM_DESCSIZE 64 @@ -90,8 +84,8 @@ struct sec4_sg_entry { #define HDR_ZRO 0x00008000 /* Start Index or SharedDesc Length */ -#define HDR_START_IDX_MASK 0x3f #define HDR_START_IDX_SHIFT 16 +#define HDR_START_IDX_MASK (0x3f << HDR_START_IDX_SHIFT) /* If shared descriptor header, 6-bit length */ #define HDR_DESCLEN_SHR_MASK 0x3f @@ -121,10 +115,10 @@ struct sec4_sg_entry { #define HDR_PROP_DNR 0x00000800 /* JobDesc/SharedDesc share property */ -#define HDR_SD_SHARE_MASK 0x03 #define HDR_SD_SHARE_SHIFT 8 -#define HDR_JD_SHARE_MASK 0x07 +#define HDR_SD_SHARE_MASK (0x03 << HDR_SD_SHARE_SHIFT) #define HDR_JD_SHARE_SHIFT 8 +#define HDR_JD_SHARE_MASK (0x07 << HDR_JD_SHARE_SHIFT) #define HDR_SHARE_NEVER (0x00 << HDR_SD_SHARE_SHIFT) #define HDR_SHARE_WAIT (0x01 << HDR_SD_SHARE_SHIFT) @@ -235,7 +229,7 @@ struct sec4_sg_entry { #define LDST_SRCDST_WORD_DECO_MATH2 (0x0a << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_DECO_AAD_SZ (0x0b << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_DECO_MATH3 (0x0b << LDST_SRCDST_SHIFT) -#define LDST_SRCDST_WORD_CLASS1_ICV_SZ (0x0c << LDST_SRCDST_SHIFT) +#define LDST_SRCDST_WORD_CLASS1_IV_SZ (0x0c << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_ALTDS_CLASS1 (0x0f << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_PKHA_A_SZ (0x10 << LDST_SRCDST_SHIFT) #define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT) @@ -400,7 +394,7 @@ struct sec4_sg_entry { #define FIFOST_TYPE_PKHA_N (0x08 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_PKHA_A (0x0c << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_PKHA_B (0x0d << FIFOST_TYPE_SHIFT) -#define FIFOST_TYPE_AF_SBOX_JKEK (0x10 << FIFOST_TYPE_SHIFT) +#define FIFOST_TYPE_AF_SBOX_JKEK (0x20 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_AF_SBOX_TKEK (0x21 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_PKHA_E_JKEK (0x22 << FIFOST_TYPE_SHIFT) #define FIFOST_TYPE_PKHA_E_TKEK (0x23 << FIFOST_TYPE_SHIFT) @@ -1107,8 +1101,8 @@ struct sec4_sg_entry { /* For non-protocol/alg-only op commands */ #define OP_ALG_TYPE_SHIFT 24 #define OP_ALG_TYPE_MASK (0x7 << OP_ALG_TYPE_SHIFT) -#define OP_ALG_TYPE_CLASS1 2 -#define OP_ALG_TYPE_CLASS2 4 +#define OP_ALG_TYPE_CLASS1 (2 << OP_ALG_TYPE_SHIFT) +#define OP_ALG_TYPE_CLASS2 (4 << OP_ALG_TYPE_SHIFT) #define OP_ALG_ALGSEL_SHIFT 16 #define OP_ALG_ALGSEL_MASK (0xff << OP_ALG_ALGSEL_SHIFT) @@ -1249,7 +1243,7 @@ struct sec4_sg_entry { #define OP_ALG_PKMODE_MOD_PRIMALITY 0x00f /* PKHA mode copy-memory functions */ -#define OP_ALG_PKMODE_SRC_REG_SHIFT 13 +#define OP_ALG_PKMODE_SRC_REG_SHIFT 17 #define OP_ALG_PKMODE_SRC_REG_MASK (7 << OP_ALG_PKMODE_SRC_REG_SHIFT) #define OP_ALG_PKMODE_DST_REG_SHIFT 10 #define OP_ALG_PKMODE_DST_REG_MASK (7 << OP_ALG_PKMODE_DST_REG_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index a8cd8a7..b9c8d98 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -33,38 +33,39 @@ extern bool caam_little_end; -static inline int desc_len(u32 *desc) +static inline int desc_len(u32 * const desc) { return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } -static inline int desc_bytes(void *desc) +static inline int desc_bytes(void * const desc) { return desc_len(desc) * CAAM_CMD_SZ; } -static inline u32 *desc_end(u32 *desc) +static inline u32 *desc_end(u32 * const desc) { return desc + desc_len(desc); } -static inline void *sh_desc_pdb(u32 *desc) +static inline void *sh_desc_pdb(u32 * const desc) { return desc + 1; } -static inline void init_desc(u32 *desc, u32 options) +static inline void init_desc(u32 * const desc, u32 options) { *desc = cpu_to_caam32((options | HDR_ONE) + 1); } -static inline void init_sh_desc(u32 *desc, u32 options) +static inline void init_sh_desc(u32 * const desc, u32 options) { PRINT_POS; init_desc(desc, CMD_SHARED_DESC_HDR | options); } -static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +static inline void init_sh_desc_pdb(u32 * const desc, u32 options, + size_t pdb_bytes) { u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; @@ -72,19 +73,20 @@ static inline void init_sh_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) options); } -static inline void init_job_desc(u32 *desc, u32 options) +static inline void init_job_desc(u32 * const desc, u32 options) { init_desc(desc, CMD_DESC_HDR | options); } -static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +static inline void init_job_desc_pdb(u32 * const desc, u32 options, + size_t pdb_bytes) { u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options); } -static inline void append_ptr(u32 *desc, dma_addr_t ptr) +static inline void append_ptr(u32 * const desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); @@ -94,8 +96,8 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr) CAAM_PTR_SZ / CAAM_CMD_SZ); } -static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, - u32 options) +static inline void init_job_desc_shared(u32 * const desc, dma_addr_t ptr, + int len, u32 options) { PRINT_POS; init_job_desc(desc, HDR_SHARED | options | @@ -103,7 +105,7 @@ static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, append_ptr(desc, ptr); } -static inline void append_data(u32 *desc, void *data, int len) +static inline void append_data(u32 * const desc, void *data, int len) { u32 *offset = desc_end(desc); @@ -114,7 +116,7 @@ static inline void append_data(u32 *desc, void *data, int len) (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } -static inline void append_cmd(u32 *desc, u32 command) +static inline void append_cmd(u32 * const desc, u32 command) { u32 *cmd = desc_end(desc); @@ -125,7 +127,7 @@ static inline void append_cmd(u32 *desc, u32 command) #define append_u32 append_cmd -static inline void append_u64(u32 *desc, u64 data) +static inline void append_u64(u32 * const desc, u64 data) { u32 *offset = desc_end(desc); @@ -142,14 +144,14 @@ static inline void append_u64(u32 *desc, u64 data) } /* Write command without affecting header, and return pointer to next word */ -static inline u32 *write_cmd(u32 *desc, u32 command) +static inline u32 *write_cmd(u32 * const desc, u32 command) { *desc = cpu_to_caam32(command); return desc + 1; } -static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, +static inline void append_cmd_ptr(u32 * const desc, dma_addr_t ptr, int len, u32 command) { append_cmd(desc, command | len); @@ -157,7 +159,7 @@ static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len, } /* Write length after pointer, rather than inside command */ -static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr, +static inline void append_cmd_ptr_extlen(u32 * const desc, dma_addr_t ptr, unsigned int len, u32 command) { append_cmd(desc, command); @@ -166,7 +168,7 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr, append_cmd(desc, len); } -static inline void append_cmd_data(u32 *desc, void *data, int len, +static inline void append_cmd_data(u32 * const desc, void *data, int len, u32 command) { append_cmd(desc, command | IMMEDIATE | len); @@ -174,7 +176,7 @@ static inline void append_cmd_data(u32 *desc, void *data, int len, } #define APPEND_CMD_RET(cmd, op) \ -static inline u32 *append_##cmd(u32 *desc, u32 options) \ +static inline u32 *append_##cmd(u32 * const desc, u32 options) \ { \ u32 *cmd = desc_end(desc); \ PRINT_POS; \ @@ -184,13 +186,13 @@ static inline u32 *append_##cmd(u32 *desc, u32 options) \ APPEND_CMD_RET(jump, JUMP) APPEND_CMD_RET(move, MOVE) -static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) +static inline void set_jump_tgt_here(u32 * const desc, u32 *jump_cmd) { *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | (desc_len(desc) - (jump_cmd - desc))); } -static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) +static inline void set_move_tgt_here(u32 * const desc, u32 *move_cmd) { u32 val = caam32_to_cpu(*move_cmd); @@ -200,7 +202,7 @@ static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) } #define APPEND_CMD(cmd, op) \ -static inline void append_##cmd(u32 *desc, u32 options) \ +static inline void append_##cmd(u32 * const desc, u32 options) \ { \ PRINT_POS; \ append_cmd(desc, CMD_##op | options); \ @@ -208,7 +210,8 @@ static inline void append_##cmd(u32 *desc, u32 options) \ APPEND_CMD(operation, OPERATION) #define APPEND_CMD_LEN(cmd, op) \ -static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \ +static inline void append_##cmd(u32 * const desc, unsigned int len, \ + u32 options) \ { \ PRINT_POS; \ append_cmd(desc, CMD_##op | len | options); \ @@ -220,8 +223,8 @@ APPEND_CMD_LEN(seq_fifo_load, SEQ_FIFO_LOAD) APPEND_CMD_LEN(seq_fifo_store, SEQ_FIFO_STORE) #define APPEND_CMD_PTR(cmd, op) \ -static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \ - u32 options) \ +static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \ + unsigned int len, u32 options) \ { \ PRINT_POS; \ append_cmd_ptr(desc, ptr, len, CMD_##op | options); \ @@ -231,8 +234,8 @@ APPEND_CMD_PTR(load, LOAD) APPEND_CMD_PTR(fifo_load, FIFO_LOAD) APPEND_CMD_PTR(fifo_store, FIFO_STORE) -static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len, - u32 options) +static inline void append_store(u32 * const desc, dma_addr_t ptr, + unsigned int len, u32 options) { u32 cmd_src; @@ -249,7 +252,8 @@ static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len, } #define APPEND_SEQ_PTR_INTLEN(cmd, op) \ -static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \ +static inline void append_seq_##cmd##_ptr_intlen(u32 * const desc, \ + dma_addr_t ptr, \ unsigned int len, \ u32 options) \ { \ @@ -263,7 +267,7 @@ APPEND_SEQ_PTR_INTLEN(in, IN) APPEND_SEQ_PTR_INTLEN(out, OUT) #define APPEND_CMD_PTR_TO_IMM(cmd, op) \ -static inline void append_##cmd##_as_imm(u32 *desc, void *data, \ +static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \ unsigned int len, u32 options) \ { \ PRINT_POS; \ @@ -273,7 +277,7 @@ APPEND_CMD_PTR_TO_IMM(load, LOAD); APPEND_CMD_PTR_TO_IMM(fifo_load, FIFO_LOAD); #define APPEND_CMD_PTR_EXTLEN(cmd, op) \ -static inline void append_##cmd##_extlen(u32 *desc, dma_addr_t ptr, \ +static inline void append_##cmd##_extlen(u32 * const desc, dma_addr_t ptr, \ unsigned int len, u32 options) \ { \ PRINT_POS; \ @@ -287,7 +291,7 @@ APPEND_CMD_PTR_EXTLEN(seq_out_ptr, SEQ_OUT_PTR) * the size of its type */ #define APPEND_CMD_PTR_LEN(cmd, op, type) \ -static inline void append_##cmd(u32 *desc, dma_addr_t ptr, \ +static inline void append_##cmd(u32 * const desc, dma_addr_t ptr, \ type len, u32 options) \ { \ PRINT_POS; \ @@ -304,7 +308,7 @@ APPEND_CMD_PTR_LEN(seq_out_ptr, SEQ_OUT_PTR, u32) * from length of immediate data provided, e.g., split keys */ #define APPEND_CMD_PTR_TO_IMM2(cmd, op) \ -static inline void append_##cmd##_as_imm(u32 *desc, void *data, \ +static inline void append_##cmd##_as_imm(u32 * const desc, void *data, \ unsigned int data_len, \ unsigned int len, u32 options) \ { \ @@ -315,7 +319,7 @@ static inline void append_##cmd##_as_imm(u32 *desc, void *data, \ APPEND_CMD_PTR_TO_IMM2(key, KEY); #define APPEND_CMD_RAW_IMM(cmd, op, type) \ -static inline void append_##cmd##_imm_##type(u32 *desc, type immediate, \ +static inline void append_##cmd##_imm_##type(u32 * const desc, type immediate, \ u32 options) \ { \ PRINT_POS; \ @@ -426,3 +430,64 @@ do { \ APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data) #define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \ APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data) + +/** + * struct alginfo - Container for algorithm details + * @algtype: algorithm selector; for valid values, see documentation of the + * functions where it is used. + * @keylen: length of the provided algorithm key, in bytes + * @keylen_pad: padded length of the provided algorithm key, in bytes + * @key: address where algorithm key resides; virtual address if key_inline + * is true, dma (bus) address if key_inline is false. + * @key_inline: true - key can be inlined in the descriptor; false - key is + * referenced by the descriptor + */ +struct alginfo { + u32 algtype; + unsigned int keylen; + unsigned int keylen_pad; + union { + dma_addr_t key_dma; + void *key_virt; + }; + bool key_inline; +}; + +/** + * desc_inline_query() - Provide indications on which data items can be inlined + * and which shall be referenced in a shared descriptor. + * @sd_base_len: Shared descriptor base length - bytes consumed by the commands, + * excluding the data items to be inlined (or corresponding + * pointer if an item is not inlined). Each cnstr_* function that + * generates descriptors should have a define mentioning + * corresponding length. + * @jd_len: Maximum length of the job descriptor(s) that will be used + * together with the shared descriptor. + * @data_len: Array of lengths of the data items trying to be inlined + * @inl_mask: 32bit mask with bit x = 1 if data item x can be inlined, 0 + * otherwise. + * @count: Number of data items (size of @data_len array); must be <= 32 + * + * Return: 0 if data can be inlined / referenced, negative value if not. If 0, + * check @inl_mask for details. + */ +static inline int desc_inline_query(unsigned int sd_base_len, + unsigned int jd_len, unsigned int *data_len, + u32 *inl_mask, unsigned int count) +{ + int rem_bytes = (int)(CAAM_DESC_BYTES_MAX - sd_base_len - jd_len); + unsigned int i; + + *inl_mask = 0; + for (i = 0; (i < count) && (rem_bytes > 0); i++) { + if (rem_bytes - (int)(data_len[i] + + (count - i - 1) * CAAM_PTR_SZ) >= 0) { + rem_bytes -= data_len[i]; + *inl_mask |= (1 << i); + } else { + rem_bytes -= CAAM_PTR_SZ; + } + } + + return (rem_bytes >= 0) ? 0 : -1; +} diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 33e41ea..79a0cc7 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -146,10 +146,9 @@ static void report_ccb_status(struct device *jrdev, const u32 status, strlen(rng_err_id_list[err_id])) { /* RNG-only error */ err_str = rng_err_id_list[err_id]; - } else if (err_id < ARRAY_SIZE(err_id_list)) + } else { err_str = err_id_list[err_id]; - else - snprintf(err_err_code, sizeof(err_err_code), "%02x", err_id); + } /* * CCB ICV check failures are part of normal operation life; diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 5d4c050..e2bcacc 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -41,6 +41,7 @@ struct caam_drv_private_jr { struct device *dev; int ridx; struct caam_job_ring __iomem *rregs; /* JobR's register space */ + struct tasklet_struct irqtask; int irq; /* One per queue */ /* Number of scatterlist crypt transforms active on the JobR */ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 757c27f..c8604df 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -73,6 +73,8 @@ static int caam_jr_shutdown(struct device *dev) ret = caam_reset_hw_jr(dev); + tasklet_kill(&jrp->irqtask); + /* Release interrupt */ free_irq(jrp->irq, dev); @@ -128,7 +130,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* * Check the output ring for ready responses, kick - * the threaded irq if jobs done. + * tasklet if jobs done. */ irqstate = rd_reg32(&jrp->rregs->jrintstatus); if (!irqstate) @@ -150,13 +152,18 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); - return IRQ_WAKE_THREAD; + preempt_disable(); + tasklet_schedule(&jrp->irqtask); + preempt_enable(); + + return IRQ_HANDLED; } -static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) +/* Deferred service handler, run as interrupt-fired tasklet */ +static void caam_jr_dequeue(unsigned long devarg) { int hw_idx, sw_idx, i, head, tail; - struct device *dev = st_dev; + struct device *dev = (struct device *)devarg; struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg); u32 *userdesc, userstatus; @@ -230,8 +237,6 @@ static irqreturn_t caam_jr_threadirq(int irq, void *st_dev) /* reenable / unmask IRQs */ clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); - - return IRQ_HANDLED; } /** @@ -389,10 +394,11 @@ static int caam_jr_init(struct device *dev) jrp = dev_get_drvdata(dev); + tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev); + /* Connect job ring interrupt handler. */ - error = request_threaded_irq(jrp->irq, caam_jr_interrupt, - caam_jr_threadirq, IRQF_SHARED, - dev_name(dev), dev); + error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED, + dev_name(dev), dev); if (error) { dev_err(dev, "can't connect JobR %d interrupt (%d)\n", jrp->ridx, jrp->irq); @@ -454,6 +460,7 @@ out_free_inpring: out_free_irq: free_irq(jrp->irq, dev); out_kill_deq: + tasklet_kill(&jrp->irqtask); return error; } @@ -489,7 +496,7 @@ static int caam_jr_probe(struct platform_device *pdev) return -ENOMEM; } - jrpriv->rregs = (struct caam_job_ring __force *)ctrl; + jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl; if (sizeof(dma_addr_t) == sizeof(u64)) if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring")) diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c index e1eaf4f..1bb2816 100644 --- a/drivers/crypto/caam/key_gen.c +++ b/drivers/crypto/caam/key_gen.c @@ -10,6 +10,36 @@ #include "desc_constr.h" #include "key_gen.h" +/** + * split_key_len - Compute MDHA split key length for a given algorithm + * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1, + * SHA224, SHA384, SHA512. + * + * Return: MDHA split key length + */ +static inline u32 split_key_len(u32 hash) +{ + /* Sizes for MDHA pads (*not* keys): MD5, SHA1, 224, 256, 384, 512 */ + static const u8 mdpadlen[] = { 16, 20, 32, 32, 64, 64 }; + u32 idx; + + idx = (hash & OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT; + + return (u32)(mdpadlen[idx] * 2); +} + +/** + * split_key_pad_len - Compute MDHA split key pad length for a given algorithm + * @hash: Hashing algorithm selection, one of OP_ALG_ALGSEL_* - MD5, SHA1, + * SHA224, SHA384, SHA512. + * + * Return: MDHA split key pad length + */ +static inline u32 split_key_pad_len(u32 hash) +{ + return ALIGN(split_key_len(hash), 16); +} + void split_key_done(struct device *dev, u32 *desc, u32 err, void *context) { @@ -41,15 +71,29 @@ Split key generation----------------------------------------------- [06] 0x64260028 fifostr: class2 mdsplit-jdk len=40 @0xffe04000 */ -int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, - int split_key_pad_len, const u8 *key_in, u32 keylen, - u32 alg_op) +int gen_split_key(struct device *jrdev, u8 *key_out, + struct alginfo * const adata, const u8 *key_in, u32 keylen, + int max_keylen) { u32 *desc; struct split_key_result result; dma_addr_t dma_addr_in, dma_addr_out; int ret = -ENOMEM; + adata->keylen = split_key_len(adata->algtype & OP_ALG_ALGSEL_MASK); + adata->keylen_pad = split_key_pad_len(adata->algtype & + OP_ALG_ALGSEL_MASK); + +#ifdef DEBUG + dev_err(jrdev, "split keylen %d split keylen padded %d\n", + adata->keylen, adata->keylen_pad); + print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1); +#endif + + if (adata->keylen_pad > max_keylen) + return -EINVAL; + desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA); if (!desc) { dev_err(jrdev, "unable to allocate key input memory\n"); @@ -63,7 +107,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, goto out_free; } - dma_addr_out = dma_map_single(jrdev, key_out, split_key_pad_len, + dma_addr_out = dma_map_single(jrdev, key_out, adata->keylen_pad, DMA_FROM_DEVICE); if (dma_mapping_error(jrdev, dma_addr_out)) { dev_err(jrdev, "unable to map key output memory\n"); @@ -74,7 +118,9 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, append_key(desc, dma_addr_in, keylen, CLASS_2 | KEY_DEST_CLASS_REG); /* Sets MDHA up into an HMAC-INIT */ - append_operation(desc, alg_op | OP_ALG_DECRYPT | OP_ALG_AS_INIT); + append_operation(desc, (adata->algtype & OP_ALG_ALGSEL_MASK) | + OP_ALG_AAI_HMAC | OP_TYPE_CLASS2_ALG | OP_ALG_DECRYPT | + OP_ALG_AS_INIT); /* * do a FIFO_LOAD of zero, this will trigger the internal key expansion @@ -87,7 +133,7 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, * FIFO_STORE with the explicit split-key content store * (0x26 output type) */ - append_fifo_store(desc, dma_addr_out, split_key_len, + append_fifo_store(desc, dma_addr_out, adata->keylen, LDST_CLASS_2_CCB | FIFOST_TYPE_SPLIT_KEK); #ifdef DEBUG @@ -108,11 +154,11 @@ int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, #ifdef DEBUG print_hex_dump(KERN_ERR, "ctx.key@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, key_out, - split_key_pad_len, 1); + adata->keylen_pad, 1); #endif } - dma_unmap_single(jrdev, dma_addr_out, split_key_pad_len, + dma_unmap_single(jrdev, dma_addr_out, adata->keylen_pad, DMA_FROM_DEVICE); out_unmap_in: dma_unmap_single(jrdev, dma_addr_in, keylen, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/key_gen.h b/drivers/crypto/caam/key_gen.h index c5588f6..4628f38 100644 --- a/drivers/crypto/caam/key_gen.h +++ b/drivers/crypto/caam/key_gen.h @@ -12,6 +12,6 @@ struct split_key_result { void split_key_done(struct device *dev, u32 *desc, u32 err, void *context); -int gen_split_key(struct device *jrdev, u8 *key_out, int split_key_len, - int split_key_pad_len, const u8 *key_in, u32 keylen, - u32 alg_op); +int gen_split_key(struct device *jrdev, u8 *key_out, + struct alginfo * const adata, const u8 *key_in, u32 keylen, + int max_keylen); diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 41cd5a3..6afa20c 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -7,7 +7,11 @@ #include "regs.h" -struct sec4_sg_entry; +struct sec4_sg_entry { + u64 ptr; + u32 len; + u32 bpid_offset; +}; /* * convert single dma address to h/w link table format diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 8d2dbac..7bc0998 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -404,10 +404,6 @@ static int ccp_init(struct ccp_device *ccp) goto e_pool; } - /* Initialize the queues used to wait for KSB space and suspend */ - init_waitqueue_head(&ccp->sb_queue); - init_waitqueue_head(&ccp->suspend_queue); - dev_dbg(dev, "Starting threads...\n"); /* Create a kthread for each queue */ for (i = 0; i < ccp->cmd_q_count; i++) { diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c index faf3cb3..e2ce819 100644 --- a/drivers/crypto/ccp/ccp-dev-v5.c +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -21,6 +21,12 @@ #include "ccp-dev.h" +/* Allocate the requested number of contiguous LSB slots + * from the LSB bitmap. Look in the private range for this + * queue first; failing that, check the public area. + * If no space is available, wait around. + * Return: first slot number + */ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) { struct ccp_device *ccp; @@ -50,7 +56,7 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) bitmap_set(ccp->lsbmap, start, count); mutex_unlock(&ccp->sb_mutex); - return start * LSB_ITEM_SIZE; + return start; } ccp->sb_avail = 0; @@ -63,17 +69,18 @@ static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) } } +/* Free a number of LSB slots from the bitmap, starting at + * the indicated starting slot number. + */ static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start, unsigned int count) { - int lsbno = start / LSB_SIZE; - if (!start) return; - if (cmd_q->lsb == lsbno) { + if (cmd_q->lsb == start) { /* An entry from the private LSB */ - bitmap_clear(cmd_q->lsbmap, start % LSB_SIZE, count); + bitmap_clear(cmd_q->lsbmap, start, count); } else { /* From the shared LSBs */ struct ccp_device *ccp = cmd_q->ccp; @@ -396,7 +403,7 @@ static int ccp5_perform_rsa(struct ccp_op *op) CCP5_CMD_PROT(&desc) = 0; function.raw = 0; - CCP_RSA_SIZE(&function) = op->u.rsa.mod_size; + CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3; CCP5_CMD_FUNCTION(&desc) = function.raw; CCP5_CMD_LEN(&desc) = op->u.rsa.input_len; @@ -411,10 +418,10 @@ static int ccp5_perform_rsa(struct ccp_op *op) CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; - /* Key (Exponent) is in external memory */ - CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma); - CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma); - CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + /* Exponent is in LSB memory */ + CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE; + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; return ccp5_do_cmd(&desc, op->cmd_q); } @@ -751,9 +758,6 @@ static int ccp5_init(struct ccp_device *ccp) goto e_pool; } - /* Initialize the queue used to suspend */ - init_waitqueue_head(&ccp->suspend_queue); - dev_dbg(dev, "Loading LSB map...\n"); /* Copy the private LSB mask to the public registers */ status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index cafa633..511ab04 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -41,7 +41,7 @@ struct ccp_tasklet_data { }; /* Human-readable error strings */ -char *ccp_error_codes[] = { +static char *ccp_error_codes[] = { "", "ERR 01: ILLEGAL_ENGINE", "ERR 02: ILLEGAL_KEY_ID", @@ -478,6 +478,10 @@ struct ccp_device *ccp_alloc_struct(struct device *dev) ccp->sb_count = KSB_COUNT; ccp->sb_start = 0; + /* Initialize the wait queues */ + init_waitqueue_head(&ccp->sb_queue); + init_waitqueue_head(&ccp->suspend_queue); + ccp->ord = ccp_increment_unit_ordinal(); snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", ccp->ord); snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", ccp->ord); diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h index da5f4a6..830f35e 100644 --- a/drivers/crypto/ccp/ccp-dev.h +++ b/drivers/crypto/ccp/ccp-dev.h @@ -278,7 +278,7 @@ struct ccp_cmd_queue { /* Private LSB that is assigned to this queue, or -1 if none. * Bitmap for my private LSB, unused otherwise */ - unsigned int lsb; + int lsb; DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE); /* Queue processing thread */ @@ -515,7 +515,6 @@ struct ccp_op { struct ccp_passthru_op passthru; struct ccp_ecc_op ecc; } u; - struct ccp_mem key; }; static inline u32 ccp_addr_lo(struct ccp_dma_info *info) @@ -541,23 +540,23 @@ static inline u32 ccp_addr_hi(struct ccp_dma_info *info) * word 7: upper 16 bits of key pointer; key memory type */ struct dword0 { - __le32 soc:1; - __le32 ioc:1; - __le32 rsvd1:1; - __le32 init:1; - __le32 eom:1; /* AES/SHA only */ - __le32 function:15; - __le32 engine:4; - __le32 prot:1; - __le32 rsvd2:7; + unsigned int soc:1; + unsigned int ioc:1; + unsigned int rsvd1:1; + unsigned int init:1; + unsigned int eom:1; /* AES/SHA only */ + unsigned int function:15; + unsigned int engine:4; + unsigned int prot:1; + unsigned int rsvd2:7; }; struct dword3 { - __le32 src_hi:16; - __le32 src_mem:2; - __le32 lsb_cxt_id:8; - __le32 rsvd1:5; - __le32 fixed:1; + unsigned int src_hi:16; + unsigned int src_mem:2; + unsigned int lsb_cxt_id:8; + unsigned int rsvd1:5; + unsigned int fixed:1; }; union dword4 { @@ -567,18 +566,18 @@ union dword4 { union dword5 { struct { - __le32 dst_hi:16; - __le32 dst_mem:2; - __le32 rsvd1:13; - __le32 fixed:1; + unsigned int dst_hi:16; + unsigned int dst_mem:2; + unsigned int rsvd1:13; + unsigned int fixed:1; } fields; __le32 sha_len_hi; }; struct dword7 { - __le32 key_hi:16; - __le32 key_mem:2; - __le32 rsvd1:14; + unsigned int key_hi:16; + unsigned int key_mem:2; + unsigned int rsvd1:14; }; struct ccp5_desc { diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index 4ce67fb..3e104f5 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_CHELSIO select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 + select CRYPTO_AUTHENC ---help--- The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 56b1538..2ed1e24 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -54,6 +54,12 @@ #include <crypto/algapi.h> #include <crypto/hash.h> #include <crypto/sha.h> +#include <crypto/authenc.h> +#include <crypto/internal/aead.h> +#include <crypto/null.h> +#include <crypto/internal/skcipher.h> +#include <crypto/aead.h> +#include <crypto/scatterwalk.h> #include <crypto/internal/hash.h> #include "t4fw_api.h" @@ -62,6 +68,11 @@ #include "chcr_algo.h" #include "chcr_crypto.h" +static inline struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx) +{ + return ctx->crypto_ctx->aeadctx; +} + static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx) { return ctx->crypto_ctx->ablkctx; @@ -72,6 +83,16 @@ static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx) return ctx->crypto_ctx->hmacctx; } +static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx) +{ + return gctx->ctx->gcm; +} + +static inline struct chcr_authenc_ctx *AUTHENC_CTX(struct chcr_aead_ctx *gctx) +{ + return gctx->ctx->authenc; +} + static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx) { return ctx->dev->u_ctx; @@ -94,12 +115,37 @@ static inline unsigned int sgl_len(unsigned int n) return (3 * n) / 2 + (n & 1) + 2; } +static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err) +{ + u8 temp[SHA512_DIGEST_SIZE]; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + int authsize = crypto_aead_authsize(tfm); + struct cpl_fw6_pld *fw6_pld; + int cmp = 0; + + fw6_pld = (struct cpl_fw6_pld *)input; + if ((get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) || + (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_GCM)) { + cmp = memcmp(&fw6_pld->data[2], (fw6_pld + 1), authsize); + } else { + + sg_pcopy_to_buffer(req->src, sg_nents(req->src), temp, + authsize, req->assoclen + + req->cryptlen - authsize); + cmp = memcmp(temp, (fw6_pld + 1), authsize); + } + if (cmp) + *err = -EBADMSG; + else + *err = 0; +} + /* * chcr_handle_resp - Unmap the DMA buffers associated with the request * @req: crypto request */ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, - int error_status) + int err) { struct crypto_tfm *tfm = req->tfm; struct chcr_context *ctx = crypto_tfm_ctx(tfm); @@ -109,17 +155,33 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, unsigned int digestsize, updated_digestsize; switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_AEAD: + ctx_req.req.aead_req = (struct aead_request *)req; + ctx_req.ctx.reqctx = aead_request_ctx(ctx_req.req.aead_req); + dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.aead_req->dst, + ctx_req.ctx.reqctx->dst_nents, DMA_FROM_DEVICE); + if (ctx_req.ctx.reqctx->skb) { + kfree_skb(ctx_req.ctx.reqctx->skb); + ctx_req.ctx.reqctx->skb = NULL; + } + if (ctx_req.ctx.reqctx->verify == VERIFY_SW) { + chcr_verify_tag(ctx_req.req.aead_req, input, + &err); + ctx_req.ctx.reqctx->verify = VERIFY_HW; + } + break; + case CRYPTO_ALG_TYPE_BLKCIPHER: ctx_req.req.ablk_req = (struct ablkcipher_request *)req; ctx_req.ctx.ablk_ctx = ablkcipher_request_ctx(ctx_req.req.ablk_req); - if (!error_status) { + if (!err) { fw6_pld = (struct cpl_fw6_pld *)input; memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2], AES_BLOCK_SIZE); } dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst, - ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE); + ctx_req.ctx.ablk_ctx->dst_nents, DMA_FROM_DEVICE); if (ctx_req.ctx.ablk_ctx->skb) { kfree_skb(ctx_req.ctx.ablk_ctx->skb); ctx_req.ctx.ablk_ctx->skb = NULL; @@ -138,8 +200,10 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, updated_digestsize = SHA256_DIGEST_SIZE; else if (digestsize == SHA384_DIGEST_SIZE) updated_digestsize = SHA512_DIGEST_SIZE; - if (ctx_req.ctx.ahash_ctx->skb) + if (ctx_req.ctx.ahash_ctx->skb) { + kfree_skb(ctx_req.ctx.ahash_ctx->skb); ctx_req.ctx.ahash_ctx->skb = NULL; + } if (ctx_req.ctx.ahash_ctx->result == 1) { ctx_req.ctx.ahash_ctx->result = 0; memcpy(ctx_req.req.ahash_req->result, input + @@ -150,11 +214,9 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, sizeof(struct cpl_fw6_pld), updated_digestsize); } - kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr); - ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL; break; } - return 0; + return err; } /* @@ -178,40 +240,81 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) return flits + sgl_len(cnt); } -static struct shash_desc *chcr_alloc_shash(unsigned int ds) +static inline void get_aes_decrypt_key(unsigned char *dec_key, + const unsigned char *key, + unsigned int keylength) +{ + u32 temp; + u32 w_ring[MAX_NK]; + int i, j, k; + u8 nr, nk; + + switch (keylength) { + case AES_KEYLENGTH_128BIT: + nk = KEYLENGTH_4BYTES; + nr = NUMBER_OF_ROUNDS_10; + break; + case AES_KEYLENGTH_192BIT: + nk = KEYLENGTH_6BYTES; + nr = NUMBER_OF_ROUNDS_12; + break; + case AES_KEYLENGTH_256BIT: + nk = KEYLENGTH_8BYTES; + nr = NUMBER_OF_ROUNDS_14; + break; + default: + return; + } + for (i = 0; i < nk; i++) + w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]); + + i = 0; + temp = w_ring[nk - 1]; + while (i + nk < (nr + 1) * 4) { + if (!(i % nk)) { + /* RotWord(temp) */ + temp = (temp << 8) | (temp >> 24); + temp = aes_ks_subword(temp); + temp ^= round_constant[i / nk]; + } else if (nk == 8 && (i % 4 == 0)) { + temp = aes_ks_subword(temp); + } + w_ring[i % nk] ^= temp; + temp = w_ring[i % nk]; + i++; + } + i--; + for (k = 0, j = i % nk; k < nk; k++) { + *((u32 *)dec_key + k) = htonl(w_ring[j]); + j--; + if (j < 0) + j += nk; + } +} + +static struct crypto_shash *chcr_alloc_shash(unsigned int ds) { struct crypto_shash *base_hash = NULL; - struct shash_desc *desc; switch (ds) { case SHA1_DIGEST_SIZE: - base_hash = crypto_alloc_shash("sha1-generic", 0, 0); + base_hash = crypto_alloc_shash("sha1", 0, 0); break; case SHA224_DIGEST_SIZE: - base_hash = crypto_alloc_shash("sha224-generic", 0, 0); + base_hash = crypto_alloc_shash("sha224", 0, 0); break; case SHA256_DIGEST_SIZE: - base_hash = crypto_alloc_shash("sha256-generic", 0, 0); + base_hash = crypto_alloc_shash("sha256", 0, 0); break; case SHA384_DIGEST_SIZE: - base_hash = crypto_alloc_shash("sha384-generic", 0, 0); + base_hash = crypto_alloc_shash("sha384", 0, 0); break; case SHA512_DIGEST_SIZE: - base_hash = crypto_alloc_shash("sha512-generic", 0, 0); + base_hash = crypto_alloc_shash("sha512", 0, 0); break; } - if (IS_ERR(base_hash)) { - pr_err("Can not allocate sha-generic algo.\n"); - return (void *)base_hash; - } - desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash), - GFP_KERNEL); - if (!desc) - return ERR_PTR(-ENOMEM); - desc->tfm = base_hash; - desc->flags = crypto_shash_get_flags(base_hash); - return desc; + return base_hash; } static int chcr_compute_partial_hash(struct shash_desc *desc, @@ -279,31 +382,18 @@ static inline int is_hmac(struct crypto_tfm *tfm) struct chcr_alg_template *chcr_crypto_alg = container_of(__crypto_ahash_alg(alg), struct chcr_alg_template, alg.hash); - if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) == - CRYPTO_ALG_SUB_TYPE_HASH_HMAC) + if (chcr_crypto_alg->type == CRYPTO_ALG_TYPE_HMAC) return 1; return 0; } -static inline unsigned int ch_nents(struct scatterlist *sg, - unsigned int *total_size) -{ - unsigned int nents; - - for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) { - nents++; - *total_size += sg->length; - } - return nents; -} - static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, struct scatterlist *sg, struct phys_sge_parm *sg_param) { struct phys_sge_pairs *to; - unsigned int out_buf_size = sg_param->obsize; - unsigned int nents = sg_param->nents, i, j, tot_len = 0; + int out_buf_size = sg_param->obsize; + unsigned int nents = sg_param->nents, i, j = 0; phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL) | CPL_RX_PHYS_DSGL_ISRDMA_V(0)); @@ -321,25 +411,24 @@ static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, sizeof(struct cpl_rx_phys_dsgl)); for (i = 0; nents; to++) { - for (j = i; (nents && (j < (8 + i))); j++, nents--) { - to->len[j] = htons(sg->length); + for (j = 0; j < 8 && nents; j++, nents--) { + out_buf_size -= sg_dma_len(sg); + to->len[j] = htons(sg_dma_len(sg)); to->addr[j] = cpu_to_be64(sg_dma_address(sg)); - if (out_buf_size) { - if (tot_len + sg_dma_len(sg) >= out_buf_size) { - to->len[j] = htons(out_buf_size - - tot_len); - return; - } - tot_len += sg_dma_len(sg); - } sg = sg_next(sg); } } + if (out_buf_size) { + j--; + to--; + to->len[j] = htons(ntohs(to->len[j]) + (out_buf_size)); + } } -static inline unsigned -int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl, - struct scatterlist *sg, struct phys_sge_parm *sg_param) +static inline int map_writesg_phys_cpl(struct device *dev, + struct cpl_rx_phys_dsgl *phys_cpl, + struct scatterlist *sg, + struct phys_sge_parm *sg_param) { if (!sg || !sg_param->nents) return 0; @@ -353,6 +442,14 @@ int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl, return 0; } +static inline int get_aead_subtype(struct crypto_aead *aead) +{ + struct aead_alg *alg = crypto_aead_alg(aead); + struct chcr_alg_template *chcr_crypto_alg = + container_of(alg, struct chcr_alg_template, alg.aead); + return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK; +} + static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm) { struct crypto_alg *alg = tfm->__crt_alg; @@ -362,8 +459,23 @@ static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm) return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK; } +static inline void write_buffer_to_skb(struct sk_buff *skb, + unsigned int *frags, + char *bfr, + u8 bfr_len) +{ + skb->len += bfr_len; + skb->data_len += bfr_len; + skb->truesize += bfr_len; + get_page(virt_to_page(bfr)); + skb_fill_page_desc(skb, *frags, virt_to_page(bfr), + offset_in_page(bfr), bfr_len); + (*frags)++; +} + + static inline void -write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags, +write_sg_to_skb(struct sk_buff *skb, unsigned int *frags, struct scatterlist *sg, unsigned int count) { struct page *spage; @@ -372,8 +484,9 @@ write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags, skb->len += count; skb->data_len += count; skb->truesize += count; + while (count > 0) { - if (sg && (!(sg->length))) + if (!sg || (!(sg->length))) break; spage = sg_page(sg); get_page(spage); @@ -389,29 +502,25 @@ static int generate_copy_rrkey(struct ablk_ctx *ablkctx, struct _key_ctx *key_ctx) { if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { - get_aes_decrypt_key(key_ctx->key, ablkctx->key, - ablkctx->enckey_len << 3); - memset(key_ctx->key + ablkctx->enckey_len, 0, - CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len); + memcpy(key_ctx->key, ablkctx->rrkey, ablkctx->enckey_len); } else { memcpy(key_ctx->key, ablkctx->key + (ablkctx->enckey_len >> 1), ablkctx->enckey_len >> 1); - get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1), - ablkctx->key, ablkctx->enckey_len << 2); + memcpy(key_ctx->key + (ablkctx->enckey_len >> 1), + ablkctx->rrkey, ablkctx->enckey_len >> 1); } return 0; } static inline void create_wreq(struct chcr_context *ctx, - struct fw_crypto_lookaside_wr *wreq, + struct chcr_wr *chcr_req, void *req, struct sk_buff *skb, int kctx_len, int hash_sz, - unsigned int phys_dsgl) + int is_iv, + unsigned int sc_len) { struct uld_ctx *u_ctx = ULD_CTX(ctx); - struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1); - struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1); int iv_loc = IV_DSGL; int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id]; unsigned int immdatalen = 0, nr_frags = 0; @@ -423,27 +532,27 @@ static inline void create_wreq(struct chcr_context *ctx, nr_frags = skb_shinfo(skb)->nr_frags; } - wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen, - (kctx_len >> 4)); - wreq->pld_size_hash_size = + chcr_req->wreq.op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen, + ((sizeof(chcr_req->key_ctx) + kctx_len) >> 4)); + chcr_req->wreq.pld_size_hash_size = htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) | FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz)); - wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP( + chcr_req->wreq.len16_pkd = + htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP( (calc_tx_flits_ofld(skb) * 8), 16))); - wreq->cookie = cpu_to_be64((uintptr_t)req); - wreq->rx_chid_to_rx_q_id = + chcr_req->wreq.cookie = cpu_to_be64((uintptr_t)req); + chcr_req->wreq.rx_chid_to_rx_q_id = FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid, - (hash_sz) ? IV_NOP : iv_loc); + is_iv ? iv_loc : IV_NOP); - ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); - ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), - 16) - ((sizeof(*wreq)) >> 4))); + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); + chcr_req->ulptx.len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), + 16) - ((sizeof(chcr_req->wreq)) >> 4))); - sc_imm->cmd_more = FILL_CMD_MORE(immdatalen); - sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len + - ((hash_sz) ? DUMMY_BYTES : - (sizeof(struct cpl_rx_phys_dsgl) + - phys_dsgl)) + immdatalen); + chcr_req->sc_imm.cmd_more = FILL_CMD_MORE(immdatalen); + chcr_req->sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + + sizeof(chcr_req->key_ctx) + + kctx_len + sc_len + immdatalen); } /** @@ -454,86 +563,83 @@ static inline void create_wreq(struct chcr_context *ctx, * @op_type: encryption or decryption */ static struct sk_buff -*create_cipher_wr(struct crypto_async_request *req_base, - struct chcr_context *ctx, unsigned short qid, +*create_cipher_wr(struct ablkcipher_request *req, + unsigned short qid, unsigned short op_type) { - struct ablkcipher_request *req = (struct ablkcipher_request *)req_base; struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct ablk_ctx *ablkctx = ABLK_CTX(ctx); struct sk_buff *skb = NULL; - struct _key_ctx *key_ctx; - struct fw_crypto_lookaside_wr *wreq; - struct cpl_tx_sec_pdu *sec_cpl; + struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; - struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req); + struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req); struct phys_sge_parm sg_param; - unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0; + unsigned int frags = 0, transhdr_len, phys_dsgl; unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len; + gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; if (!req->info) return ERR_PTR(-EINVAL); - ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize); - ablkctx->enc = op_type; - + reqctx->dst_nents = sg_nents_for_len(req->dst, req->nbytes); + if (reqctx->dst_nents <= 0) { + pr_err("AES:Invalid Destination sg lists\n"); + return ERR_PTR(-EINVAL); + } if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || - (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) + (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) { + pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n", + ablkctx->enckey_len, req->nbytes, ivsize); return ERR_PTR(-EINVAL); + } - phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents); + phys_dsgl = get_space_for_phys_dsgl(reqctx->dst_nents); - kctx_len = sizeof(*key_ctx) + - (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16); + kctx_len = (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16); transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl); - skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), - GFP_ATOMIC); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags); if (!skb) return ERR_PTR(-ENOMEM); skb_reserve(skb, sizeof(struct sge_opaque_hdr)); - wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); - - sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); - sec_cpl->op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1); - - sec_cpl->pldlen = htonl(ivsize + req->nbytes); - sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, - ivsize + 1, 0); - - sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, 0, - 0, 0); - sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0, + chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len); + memset(chcr_req, 0, transhdr_len); + chcr_req->sec_cpl.op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1); + + chcr_req->sec_cpl.pldlen = htonl(ivsize + req->nbytes); + chcr_req->sec_cpl.aadstart_cipherstop_hi = + FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, ivsize + 1, 0); + + chcr_req->sec_cpl.cipherstop_lo_authinsert = + FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0); + chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0, ablkctx->ciph_mode, - 0, 0, ivsize >> 1, 1); - sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0, + 0, 0, ivsize >> 1); + chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0, 0, 1, phys_dsgl); - key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); - key_ctx->ctx_hdr = ablkctx->key_ctx_hdr; + chcr_req->key_ctx.ctx_hdr = ablkctx->key_ctx_hdr; if (op_type == CHCR_DECRYPT_OP) { - if (generate_copy_rrkey(ablkctx, key_ctx)) - goto map_fail1; + generate_copy_rrkey(ablkctx, &chcr_req->key_ctx); } else { if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { - memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len); + memcpy(chcr_req->key_ctx.key, ablkctx->key, + ablkctx->enckey_len); } else { - memcpy(key_ctx->key, ablkctx->key + + memcpy(chcr_req->key_ctx.key, ablkctx->key + (ablkctx->enckey_len >> 1), ablkctx->enckey_len >> 1); - memcpy(key_ctx->key + + memcpy(chcr_req->key_ctx.key + (ablkctx->enckey_len >> 1), ablkctx->key, ablkctx->enckey_len >> 1); } } - phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len); - - memcpy(ablkctx->iv, req->info, ivsize); - sg_init_table(&ablkctx->iv_sg, 1); - sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize); - sg_param.nents = ablkctx->dst_nents; - sg_param.obsize = dst_bufsize; + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + sg_param.nents = reqctx->dst_nents; + sg_param.obsize = req->nbytes; sg_param.qid = qid; sg_param.align = 1; if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst, @@ -541,10 +647,12 @@ static struct sk_buff goto map_fail1; skb_set_transport_header(skb, transhdr_len); - write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize); - write_sg_data_page_desc(skb, &frags, req->src, req->nbytes); - create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl); - req_ctx->skb = skb; + memcpy(reqctx->iv, req->info, ivsize); + write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize); + write_sg_to_skb(skb, &frags, req->src, req->nbytes); + create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1, + sizeof(struct cpl_rx_phys_dsgl) + phys_dsgl); + reqctx->skb = skb; skb_get(skb); return skb; map_fail1: @@ -557,15 +665,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key, { struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); struct ablk_ctx *ablkctx = ABLK_CTX(ctx); - struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm); unsigned int ck_size, context_size; u16 alignment = 0; - if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize)) - goto badkey_err; - - memcpy(ablkctx->key, key, keylen); - ablkctx->enckey_len = keylen; if (keylen == AES_KEYSIZE_128) { ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; } else if (keylen == AES_KEYSIZE_192) { @@ -576,7 +678,9 @@ static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key, } else { goto badkey_err; } - + memcpy(ablkctx->key, key, keylen); + ablkctx->enckey_len = keylen; + get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, keylen << 3); context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + keylen + alignment) >> 4; @@ -612,7 +716,6 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_async_request *req_base = &req->base; struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; @@ -622,8 +725,7 @@ static int chcr_aes_encrypt(struct ablkcipher_request *req) return -EBUSY; } - skb = create_cipher_wr(req_base, ctx, - u_ctx->lldi.rxq_ids[ctx->tx_channel_id], + skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id], CHCR_ENCRYPT_OP); if (IS_ERR(skb)) { pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); @@ -639,7 +741,6 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req) { struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); - struct crypto_async_request *req_base = &req->base; struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; @@ -649,7 +750,7 @@ static int chcr_aes_decrypt(struct ablkcipher_request *req) return -EBUSY; } - skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0], + skb = create_cipher_wr(req, u_ctx->lldi.rxq_ids[0], CHCR_DECRYPT_OP); if (IS_ERR(skb)) { pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); @@ -729,50 +830,33 @@ static int get_alg_config(struct algo_param *params, return 0; } -static inline int -write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx, - struct sk_buff *skb, unsigned int *frags, char *bfr, - u8 bfr_len) +static inline void chcr_free_shash(struct crypto_shash *base_hash) { - void *page_ptr = NULL; - - skb->len += bfr_len; - skb->data_len += bfr_len; - skb->truesize += bfr_len; - page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA); - if (!page_ptr) - return -ENOMEM; - get_page(virt_to_page(page_ptr)); - req_ctx->dummy_payload_ptr = page_ptr; - memcpy(page_ptr, bfr, bfr_len); - skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr), - offset_in_page(page_ptr), bfr_len); - (*frags)++; - return 0; + crypto_free_shash(base_hash); } /** - * create_final_hash_wr - Create hash work request + * create_hash_wr - Create hash work request * @req - Cipher req base */ -static struct sk_buff *create_final_hash_wr(struct ahash_request *req, - struct hash_wr_param *param) +static struct sk_buff *create_hash_wr(struct ahash_request *req, + struct hash_wr_param *param) { struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); struct hmac_ctx *hmacctx = HMAC_CTX(ctx); struct sk_buff *skb = NULL; - struct _key_ctx *key_ctx; - struct fw_crypto_lookaside_wr *wreq; - struct cpl_tx_sec_pdu *sec_cpl; + struct chcr_wr *chcr_req; unsigned int frags = 0, transhdr_len, iopad_alignment = 0; unsigned int digestsize = crypto_ahash_digestsize(tfm); - unsigned int kctx_len = sizeof(*key_ctx); + unsigned int kctx_len = 0; u8 hash_size_in_response = 0; + gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; iopad_alignment = KEYCTX_ALIGN_PAD(digestsize); - kctx_len += param->alg_prm.result_size + iopad_alignment; + kctx_len = param->alg_prm.result_size + iopad_alignment; if (param->opad_needed) kctx_len += param->alg_prm.result_size + iopad_alignment; @@ -781,54 +865,54 @@ static struct sk_buff *create_final_hash_wr(struct ahash_request *req, else hash_size_in_response = param->alg_prm.result_size; transhdr_len = HASH_TRANSHDR_SIZE(kctx_len); - skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), - GFP_ATOMIC); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags); if (!skb) return skb; skb_reserve(skb, sizeof(struct sge_opaque_hdr)); - wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); - memset(wreq, 0, transhdr_len); + chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len); + memset(chcr_req, 0, transhdr_len); - sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); - sec_cpl->op_ivinsrtofst = - FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0); - sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len); + chcr_req->sec_cpl.op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0); + chcr_req->sec_cpl.pldlen = htonl(param->bfr_len + param->sg_len); - sec_cpl->aadstart_cipherstop_hi = + chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0); - sec_cpl->cipherstop_lo_authinsert = + chcr_req->sec_cpl.cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0); - sec_cpl->seqno_numivs = + chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode, - param->opad_needed, 0, 0); + param->opad_needed, 0); - sec_cpl->ivgen_hdrlen = + chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0); - key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); - memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size); + memcpy(chcr_req->key_ctx.key, req_ctx->partial_hash, + param->alg_prm.result_size); if (param->opad_needed) - memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 : - CHCR_HASH_MAX_DIGEST_SIZE), + memcpy(chcr_req->key_ctx.key + + ((param->alg_prm.result_size <= 32) ? 32 : + CHCR_HASH_MAX_DIGEST_SIZE), hmacctx->opad, param->alg_prm.result_size); - key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY, + chcr_req->key_ctx.ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY, param->alg_prm.mk_size, 0, param->opad_needed, - (kctx_len >> 4)); - sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1); + ((kctx_len + + sizeof(chcr_req->key_ctx)) >> 4)); + chcr_req->sec_cpl.scmd1 = cpu_to_be64((u64)param->scmd1); skb_set_transport_header(skb, transhdr_len); if (param->bfr_len != 0) - write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr, - param->bfr_len); + write_buffer_to_skb(skb, &frags, req_ctx->reqbfr, + param->bfr_len); if (param->sg_len != 0) - write_sg_data_page_desc(skb, &frags, req->src, param->sg_len); + write_sg_to_skb(skb, &frags, req->src, param->sg_len); - create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response, - 0); + create_wreq(ctx, chcr_req, req, skb, kctx_len, hash_size_in_response, 0, + DUMMY_BYTES); req_ctx->skb = skb; skb_get(skb); return skb; @@ -854,34 +938,40 @@ static int chcr_ahash_update(struct ahash_request *req) return -EBUSY; } - if (nbytes + req_ctx->bfr_len >= bs) { - remainder = (nbytes + req_ctx->bfr_len) % bs; - nbytes = nbytes + req_ctx->bfr_len - remainder; + if (nbytes + req_ctx->reqlen >= bs) { + remainder = (nbytes + req_ctx->reqlen) % bs; + nbytes = nbytes + req_ctx->reqlen - remainder; } else { - sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr + - req_ctx->bfr_len, nbytes, 0); - req_ctx->bfr_len += nbytes; + sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->reqbfr + + req_ctx->reqlen, nbytes, 0); + req_ctx->reqlen += nbytes; return 0; } params.opad_needed = 0; params.more = 1; params.last = 0; - params.sg_len = nbytes - req_ctx->bfr_len; - params.bfr_len = req_ctx->bfr_len; + params.sg_len = nbytes - req_ctx->reqlen; + params.bfr_len = req_ctx->reqlen; params.scmd1 = 0; get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); req_ctx->result = 0; req_ctx->data_len += params.sg_len + params.bfr_len; - skb = create_final_hash_wr(req, ¶ms); + skb = create_hash_wr(req, ¶ms); if (!skb) return -ENOMEM; - req_ctx->bfr_len = remainder; - if (remainder) + if (remainder) { + u8 *temp; + /* Swap buffers */ + temp = req_ctx->reqbfr; + req_ctx->reqbfr = req_ctx->skbfr; + req_ctx->skbfr = temp; sg_pcopy_to_buffer(req->src, sg_nents(req->src), - req_ctx->bfr, remainder, req->nbytes - + req_ctx->reqbfr, remainder, req->nbytes - remainder); + } + req_ctx->reqlen = remainder; skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); chcr_send_wr(skb); @@ -917,10 +1007,10 @@ static int chcr_ahash_final(struct ahash_request *req) params.sg_len = 0; get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); req_ctx->result = 1; - params.bfr_len = req_ctx->bfr_len; + params.bfr_len = req_ctx->reqlen; req_ctx->data_len += params.bfr_len + params.sg_len; - if (req_ctx->bfr && (req_ctx->bfr_len == 0)) { - create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + if (req_ctx->reqlen == 0) { + create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len); params.last = 0; params.more = 1; params.scmd1 = 0; @@ -931,7 +1021,10 @@ static int chcr_ahash_final(struct ahash_request *req) params.last = 1; params.more = 0; } - skb = create_final_hash_wr(req, ¶ms); + skb = create_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); chcr_send_wr(skb); @@ -963,12 +1056,12 @@ static int chcr_ahash_finup(struct ahash_request *req) params.opad_needed = 0; params.sg_len = req->nbytes; - params.bfr_len = req_ctx->bfr_len; + params.bfr_len = req_ctx->reqlen; get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); req_ctx->data_len += params.bfr_len + params.sg_len; req_ctx->result = 1; - if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) { - create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + if ((req_ctx->reqlen + req->nbytes) == 0) { + create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len); params.last = 0; params.more = 1; params.scmd1 = 0; @@ -979,9 +1072,10 @@ static int chcr_ahash_finup(struct ahash_request *req) params.more = 0; } - skb = create_final_hash_wr(req, ¶ms); + skb = create_hash_wr(req, ¶ms); if (!skb) return -ENOMEM; + skb->dev = u_ctx->lldi.ports[0]; set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); chcr_send_wr(skb); @@ -1023,13 +1117,13 @@ static int chcr_ahash_digest(struct ahash_request *req) req_ctx->result = 1; req_ctx->data_len += params.bfr_len + params.sg_len; - if (req_ctx->bfr && req->nbytes == 0) { - create_last_hash_block(req_ctx->bfr, bs, 0); + if (req->nbytes == 0) { + create_last_hash_block(req_ctx->reqbfr, bs, 0); params.more = 1; params.bfr_len = bs; } - skb = create_final_hash_wr(req, ¶ms); + skb = create_hash_wr(req, ¶ms); if (!skb) return -ENOMEM; @@ -1044,12 +1138,12 @@ static int chcr_ahash_export(struct ahash_request *areq, void *out) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); struct chcr_ahash_req_ctx *state = out; - state->bfr_len = req_ctx->bfr_len; + state->reqlen = req_ctx->reqlen; state->data_len = req_ctx->data_len; - memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + memcpy(state->bfr1, req_ctx->reqbfr, req_ctx->reqlen); memcpy(state->partial_hash, req_ctx->partial_hash, CHCR_HASH_MAX_DIGEST_SIZE); - return 0; + return 0; } static int chcr_ahash_import(struct ahash_request *areq, const void *in) @@ -1057,10 +1151,11 @@ static int chcr_ahash_import(struct ahash_request *areq, const void *in) struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in; - req_ctx->bfr_len = state->bfr_len; + req_ctx->reqlen = state->reqlen; req_ctx->data_len = state->data_len; - req_ctx->dummy_payload_ptr = NULL; - memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + req_ctx->reqbfr = req_ctx->bfr1; + req_ctx->skbfr = req_ctx->bfr2; + memcpy(req_ctx->bfr1, state->bfr1, CHCR_HASH_MAX_BLOCK_SIZE_128); memcpy(req_ctx->partial_hash, state->partial_hash, CHCR_HASH_MAX_DIGEST_SIZE); return 0; @@ -1075,15 +1170,16 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); unsigned int i, err = 0, updated_digestsize; - /* - * use the key to calculate the ipad and opad. ipad will sent with the + SHASH_DESC_ON_STACK(shash, hmacctx->base_hash); + + /* use the key to calculate the ipad and opad. ipad will sent with the * first request's data. opad will be sent with the final hash result * ipad in hmacctx->ipad and opad in hmacctx->opad location */ - if (!hmacctx->desc) - return -EINVAL; + shash->tfm = hmacctx->base_hash; + shash->flags = crypto_shash_get_flags(hmacctx->base_hash); if (keylen > bs) { - err = crypto_shash_digest(hmacctx->desc, key, keylen, + err = crypto_shash_digest(shash, key, keylen, hmacctx->ipad); if (err) goto out; @@ -1104,13 +1200,13 @@ static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, updated_digestsize = SHA256_DIGEST_SIZE; else if (digestsize == SHA384_DIGEST_SIZE) updated_digestsize = SHA512_DIGEST_SIZE; - err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad, + err = chcr_compute_partial_hash(shash, hmacctx->ipad, hmacctx->ipad, digestsize); if (err) goto out; chcr_change_order(hmacctx->ipad, updated_digestsize); - err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad, + err = chcr_compute_partial_hash(shash, hmacctx->opad, hmacctx->opad, digestsize); if (err) goto out; @@ -1124,28 +1220,29 @@ static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, { struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); struct ablk_ctx *ablkctx = ABLK_CTX(ctx); - int status = 0; unsigned short context_size = 0; - if ((key_len == (AES_KEYSIZE_128 << 1)) || - (key_len == (AES_KEYSIZE_256 << 1))) { - memcpy(ablkctx->key, key, key_len); - ablkctx->enckey_len = key_len; - context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4; - ablkctx->key_ctx_hdr = - FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ? - CHCR_KEYCTX_CIPHER_KEY_SIZE_128 : - CHCR_KEYCTX_CIPHER_KEY_SIZE_256, - CHCR_KEYCTX_NO_KEY, 1, - 0, context_size); - ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS; - } else { + if ((key_len != (AES_KEYSIZE_128 << 1)) && + (key_len != (AES_KEYSIZE_256 << 1))) { crypto_tfm_set_flags((struct crypto_tfm *)tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); ablkctx->enckey_len = 0; - status = -EINVAL; + return -EINVAL; + } - return status; + + memcpy(ablkctx->key, key, key_len); + ablkctx->enckey_len = key_len; + get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, key_len << 2); + context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4; + ablkctx->key_ctx_hdr = + FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ? + CHCR_KEYCTX_CIPHER_KEY_SIZE_128 : + CHCR_KEYCTX_CIPHER_KEY_SIZE_256, + CHCR_KEYCTX_NO_KEY, 1, + 0, context_size); + ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS; + return 0; } static int chcr_sha_init(struct ahash_request *areq) @@ -1155,8 +1252,9 @@ static int chcr_sha_init(struct ahash_request *areq) int digestsize = crypto_ahash_digestsize(tfm); req_ctx->data_len = 0; - req_ctx->dummy_payload_ptr = NULL; - req_ctx->bfr_len = 0; + req_ctx->reqlen = 0; + req_ctx->reqbfr = req_ctx->bfr1; + req_ctx->skbfr = req_ctx->bfr2; req_ctx->skb = NULL; req_ctx->result = 0; copy_hash_init_values(req_ctx->partial_hash, digestsize); @@ -1204,29 +1302,1184 @@ static int chcr_hmac_cra_init(struct crypto_tfm *tfm) crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct chcr_ahash_req_ctx)); - hmacctx->desc = chcr_alloc_shash(digestsize); - if (IS_ERR(hmacctx->desc)) - return PTR_ERR(hmacctx->desc); + hmacctx->base_hash = chcr_alloc_shash(digestsize); + if (IS_ERR(hmacctx->base_hash)) + return PTR_ERR(hmacctx->base_hash); return chcr_device_init(crypto_tfm_ctx(tfm)); } -static void chcr_free_shash(struct shash_desc *desc) -{ - crypto_free_shash(desc->tfm); - kfree(desc); -} - static void chcr_hmac_cra_exit(struct crypto_tfm *tfm) { struct chcr_context *ctx = crypto_tfm_ctx(tfm); struct hmac_ctx *hmacctx = HMAC_CTX(ctx); - if (hmacctx->desc) { - chcr_free_shash(hmacctx->desc); - hmacctx->desc = NULL; + if (hmacctx->base_hash) { + chcr_free_shash(hmacctx->base_hash); + hmacctx->base_hash = NULL; + } +} + +static int chcr_copy_assoc(struct aead_request *req, + struct chcr_aead_ctx *ctx) +{ + SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null); + + skcipher_request_set_tfm(skreq, ctx->null); + skcipher_request_set_callback(skreq, aead_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen, + NULL); + + return crypto_skcipher_encrypt(skreq); +} + +static unsigned char get_hmac(unsigned int authsize) +{ + switch (authsize) { + case ICV_8: + return CHCR_SCMD_HMAC_CTRL_PL1; + case ICV_10: + return CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366; + case ICV_12: + return CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + } + return CHCR_SCMD_HMAC_CTRL_NO_TRUNC; +} + + +static struct sk_buff *create_authenc_wr(struct aead_request *req, + unsigned short qid, + int size, + unsigned short op_type) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_context *ctx = crypto_aead_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct sk_buff *skb = NULL; + struct chcr_wr *chcr_req; + struct cpl_rx_phys_dsgl *phys_cpl; + struct phys_sge_parm sg_param; + struct scatterlist *src, *dst; + struct scatterlist src_sg[2], dst_sg[2]; + unsigned int frags = 0, transhdr_len; + unsigned int ivsize = crypto_aead_ivsize(tfm), dst_size = 0; + unsigned int kctx_len = 0; + unsigned short stop_offset = 0; + unsigned int assoclen = req->assoclen; + unsigned int authsize = crypto_aead_authsize(tfm); + int err = 0; + int null = 0; + gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + + if (aeadctx->enckey_len == 0 || (req->cryptlen == 0)) + goto err; + + if (op_type && req->cryptlen < crypto_aead_authsize(tfm)) + goto err; + + if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) + goto err; + src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); + dst = src; + if (req->src != req->dst) { + err = chcr_copy_assoc(req, aeadctx); + if (err) + return ERR_PTR(err); + dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + } + if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_NULL) { + null = 1; + assoclen = 0; + } + reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + (op_type ? -authsize : authsize)); + if (reqctx->dst_nents <= 0) { + pr_err("AUTHENC:Invalid Destination sg entries\n"); + goto err; + } + dst_size = get_space_for_phys_dsgl(reqctx->dst_nents); + kctx_len = (ntohl(KEY_CONTEXT_CTX_LEN_V(aeadctx->key_ctx_hdr)) << 4) + - sizeof(chcr_req->key_ctx); + transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags); + if (!skb) + goto err; + + /* LLD is going to write the sge hdr. */ + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + + /* Write WR */ + chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len); + memset(chcr_req, 0, transhdr_len); + + stop_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize; + + /* + * Input order is AAD,IV and Payload. where IV should be included as + * the part of authdata. All other fields should be filled according + * to the hardware spec + */ + chcr_req->sec_cpl.op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, + (ivsize ? (assoclen + 1) : 0)); + chcr_req->sec_cpl.pldlen = htonl(assoclen + ivsize + req->cryptlen); + chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( + assoclen ? 1 : 0, assoclen, + assoclen + ivsize + 1, + (stop_offset & 0x1F0) >> 4); + chcr_req->sec_cpl.cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT( + stop_offset & 0xF, + null ? 0 : assoclen + ivsize + 1, + stop_offset, stop_offset); + chcr_req->sec_cpl.seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, + (op_type == CHCR_ENCRYPT_OP) ? 1 : 0, + CHCR_SCMD_CIPHER_MODE_AES_CBC, + actx->auth_mode, aeadctx->hmac_ctrl, + ivsize >> 1); + chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, + 0, 1, dst_size); + + chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr; + if (op_type == CHCR_ENCRYPT_OP) + memcpy(chcr_req->key_ctx.key, aeadctx->key, + aeadctx->enckey_len); + else + memcpy(chcr_req->key_ctx.key, actx->dec_rrkey, + aeadctx->enckey_len); + + memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) << + 4), actx->h_iopad, kctx_len - + (DIV_ROUND_UP(aeadctx->enckey_len, 16) << 4)); + + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + sg_param.nents = reqctx->dst_nents; + sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); + sg_param.qid = qid; + sg_param.align = 0; + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + &sg_param)) + goto dstmap_fail; + + skb_set_transport_header(skb, transhdr_len); + + if (assoclen) { + /* AAD buffer in */ + write_sg_to_skb(skb, &frags, req->src, assoclen); + + } + write_buffer_to_skb(skb, &frags, req->iv, ivsize); + write_sg_to_skb(skb, &frags, src, req->cryptlen); + create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1, + sizeof(struct cpl_rx_phys_dsgl) + dst_size); + reqctx->skb = skb; + skb_get(skb); + + return skb; +dstmap_fail: + /* ivmap_fail: */ + kfree_skb(skb); +err: + return ERR_PTR(-EINVAL); +} + +static void aes_gcm_empty_pld_pad(struct scatterlist *sg, + unsigned short offset) +{ + struct page *spage; + unsigned char *addr; + + spage = sg_page(sg); + get_page(spage); /* so that it is not freed by NIC */ +#ifdef KMAP_ATOMIC_ARGS + addr = kmap_atomic(spage, KM_SOFTIRQ0); +#else + addr = kmap_atomic(spage); +#endif + memset(addr + sg->offset, 0, offset + 1); + + kunmap_atomic(addr); +} + +static int set_msg_len(u8 *block, unsigned int msglen, int csize) +{ + __be32 data; + + memset(block, 0, csize); + block += csize; + + if (csize >= 4) + csize = 4; + else if (msglen > (unsigned int)(1 << (8 * csize))) + return -EOVERFLOW; + + data = cpu_to_be32(msglen); + memcpy(block - csize, (u8 *)&data + 4 - csize, csize); + + return 0; +} + +static void generate_b0(struct aead_request *req, + struct chcr_aead_ctx *aeadctx, + unsigned short op_type) +{ + unsigned int l, lp, m; + int rc; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + u8 *b0 = reqctx->scratch_pad; + + m = crypto_aead_authsize(aead); + + memcpy(b0, reqctx->iv, 16); + + lp = b0[0]; + l = lp + 1; + + /* set m, bits 3-5 */ + *b0 |= (8 * ((m - 2) / 2)); + + /* set adata, bit 6, if associated data is used */ + if (req->assoclen) + *b0 |= 64; + rc = set_msg_len(b0 + 16 - l, + (op_type == CHCR_DECRYPT_OP) ? + req->cryptlen - m : req->cryptlen, l); +} + +static inline int crypto_ccm_check_iv(const u8 *iv) +{ + /* 2 <= L <= 8, so 1 <= L' <= 7. */ + if (iv[0] < 1 || iv[0] > 7) + return -EINVAL; + + return 0; +} + +static int ccm_format_packet(struct aead_request *req, + struct chcr_aead_ctx *aeadctx, + unsigned int sub_type, + unsigned short op_type) +{ + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + int rc = 0; + + if (req->assoclen > T5_MAX_AAD_SIZE) { + pr_err("CCM: Unsupported AAD data. It should be < %d\n", + T5_MAX_AAD_SIZE); + return -EINVAL; + } + if (sub_type == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) { + reqctx->iv[0] = 3; + memcpy(reqctx->iv + 1, &aeadctx->salt[0], 3); + memcpy(reqctx->iv + 4, req->iv, 8); + memset(reqctx->iv + 12, 0, 4); + *((unsigned short *)(reqctx->scratch_pad + 16)) = + htons(req->assoclen - 8); + } else { + memcpy(reqctx->iv, req->iv, 16); + *((unsigned short *)(reqctx->scratch_pad + 16)) = + htons(req->assoclen); + } + generate_b0(req, aeadctx, op_type); + /* zero the ctr value */ + memset(reqctx->iv + 15 - reqctx->iv[0], 0, reqctx->iv[0] + 1); + return rc; +} + +static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, + unsigned int dst_size, + struct aead_request *req, + unsigned short op_type, + struct chcr_context *chcrctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned int ivsize = AES_BLOCK_SIZE; + unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; + unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; + unsigned int c_id = chcrctx->dev->tx_channel_id; + unsigned int ccm_xtra; + unsigned char tag_offset = 0, auth_offset = 0; + unsigned char hmac_ctrl = get_hmac(crypto_aead_authsize(tfm)); + unsigned int assoclen; + + if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) + assoclen = req->assoclen - 8; + else + assoclen = req->assoclen; + ccm_xtra = CCM_B0_SIZE + + ((assoclen) ? CCM_AAD_FIELD_SIZE : 0); + + auth_offset = req->cryptlen ? + (assoclen + ivsize + 1 + ccm_xtra) : 0; + if (op_type == CHCR_DECRYPT_OP) { + if (crypto_aead_authsize(tfm) != req->cryptlen) + tag_offset = crypto_aead_authsize(tfm); + else + auth_offset = 0; + } + + + sec_cpl->op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR(c_id, + 2, (ivsize ? (assoclen + 1) : 0) + + ccm_xtra); + sec_cpl->pldlen = + htonl(assoclen + ivsize + req->cryptlen + ccm_xtra); + /* For CCM there wil be b0 always. So AAD start will be 1 always */ + sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( + 1, assoclen + ccm_xtra, assoclen + + ivsize + 1 + ccm_xtra, 0); + + sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, + auth_offset, tag_offset, + (op_type == CHCR_ENCRYPT_OP) ? 0 : + crypto_aead_authsize(tfm)); + sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, + (op_type == CHCR_ENCRYPT_OP) ? 0 : 1, + cipher_mode, mac_mode, hmac_ctrl, + ivsize >> 1); + + sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, 0, + 1, dst_size); +} + +int aead_ccm_validate_input(unsigned short op_type, + struct aead_request *req, + struct chcr_aead_ctx *aeadctx, + unsigned int sub_type) +{ + if (sub_type != CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) { + if (crypto_ccm_check_iv(req->iv)) { + pr_err("CCM: IV check fails\n"); + return -EINVAL; + } + } else { + if (req->assoclen != 16 && req->assoclen != 20) { + pr_err("RFC4309: Invalid AAD length %d\n", + req->assoclen); + return -EINVAL; + } + } + if (aeadctx->enckey_len == 0) { + pr_err("CCM: Encryption key not set\n"); + return -EINVAL; + } + return 0; +} + +unsigned int fill_aead_req_fields(struct sk_buff *skb, + struct aead_request *req, + struct scatterlist *src, + unsigned int ivsize, + struct chcr_aead_ctx *aeadctx) +{ + unsigned int frags = 0; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + /* b0 and aad length(if available) */ + + write_buffer_to_skb(skb, &frags, reqctx->scratch_pad, CCM_B0_SIZE + + (req->assoclen ? CCM_AAD_FIELD_SIZE : 0)); + if (req->assoclen) { + if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309) + write_sg_to_skb(skb, &frags, req->src, + req->assoclen - 8); + else + write_sg_to_skb(skb, &frags, req->src, req->assoclen); + } + write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize); + if (req->cryptlen) + write_sg_to_skb(skb, &frags, src, req->cryptlen); + + return frags; +} + +static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, + unsigned short qid, + int size, + unsigned short op_type) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_context *ctx = crypto_aead_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct sk_buff *skb = NULL; + struct chcr_wr *chcr_req; + struct cpl_rx_phys_dsgl *phys_cpl; + struct phys_sge_parm sg_param; + struct scatterlist *src, *dst; + struct scatterlist src_sg[2], dst_sg[2]; + unsigned int frags = 0, transhdr_len, ivsize = AES_BLOCK_SIZE; + unsigned int dst_size = 0, kctx_len; + unsigned int sub_type; + unsigned int authsize = crypto_aead_authsize(tfm); + int err = 0; + gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + + + if (op_type && req->cryptlen < crypto_aead_authsize(tfm)) + goto err; + + if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) + goto err; + sub_type = get_aead_subtype(tfm); + src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); + dst = src; + if (req->src != req->dst) { + err = chcr_copy_assoc(req, aeadctx); + if (err) { + pr_err("AAD copy to destination buffer fails\n"); + return ERR_PTR(err); + } + dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + } + reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + (op_type ? -authsize : authsize)); + if (reqctx->dst_nents <= 0) { + pr_err("CCM:Invalid Destination sg entries\n"); + goto err; + } + + + if (aead_ccm_validate_input(op_type, req, aeadctx, sub_type)) + goto err; + + dst_size = get_space_for_phys_dsgl(reqctx->dst_nents); + kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) * 2; + transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags); + + if (!skb) + goto err; + + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + + chcr_req = (struct chcr_wr *) __skb_put(skb, transhdr_len); + memset(chcr_req, 0, transhdr_len); + + fill_sec_cpl_for_aead(&chcr_req->sec_cpl, dst_size, req, op_type, ctx); + + chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr; + memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len); + memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) * + 16), aeadctx->key, aeadctx->enckey_len); + + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + if (ccm_format_packet(req, aeadctx, sub_type, op_type)) + goto dstmap_fail; + + sg_param.nents = reqctx->dst_nents; + sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); + sg_param.qid = qid; + sg_param.align = 0; + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + &sg_param)) + goto dstmap_fail; + + skb_set_transport_header(skb, transhdr_len); + frags = fill_aead_req_fields(skb, req, src, ivsize, aeadctx); + create_wreq(ctx, chcr_req, req, skb, kctx_len, 0, 1, + sizeof(struct cpl_rx_phys_dsgl) + dst_size); + reqctx->skb = skb; + skb_get(skb); + return skb; +dstmap_fail: + kfree_skb(skb); + skb = NULL; +err: + return ERR_PTR(-EINVAL); +} + +static struct sk_buff *create_gcm_wr(struct aead_request *req, + unsigned short qid, + int size, + unsigned short op_type) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_context *ctx = crypto_aead_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct sk_buff *skb = NULL; + struct chcr_wr *chcr_req; + struct cpl_rx_phys_dsgl *phys_cpl; + struct phys_sge_parm sg_param; + struct scatterlist *src, *dst; + struct scatterlist src_sg[2], dst_sg[2]; + unsigned int frags = 0, transhdr_len; + unsigned int ivsize = AES_BLOCK_SIZE; + unsigned int dst_size = 0, kctx_len; + unsigned char tag_offset = 0; + unsigned int crypt_len = 0; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned char hmac_ctrl = get_hmac(authsize); + int err = 0; + gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : + GFP_ATOMIC; + + /* validate key size */ + if (aeadctx->enckey_len == 0) + goto err; + + if (op_type && req->cryptlen < crypto_aead_authsize(tfm)) + goto err; + + if (sg_nents_for_len(req->src, req->assoclen + req->cryptlen) < 0) + goto err; + + src = scatterwalk_ffwd(src_sg, req->src, req->assoclen); + dst = src; + if (req->src != req->dst) { + err = chcr_copy_assoc(req, aeadctx); + if (err) + return ERR_PTR(err); + dst = scatterwalk_ffwd(dst_sg, req->dst, req->assoclen); + } + + if (!req->cryptlen) + /* null-payload is not supported in the hardware. + * software is sending block size + */ + crypt_len = AES_BLOCK_SIZE; + else + crypt_len = req->cryptlen; + reqctx->dst_nents = sg_nents_for_len(dst, req->cryptlen + + (op_type ? -authsize : authsize)); + if (reqctx->dst_nents <= 0) { + pr_err("GCM:Invalid Destination sg entries\n"); + goto err; + } + + + dst_size = get_space_for_phys_dsgl(reqctx->dst_nents); + kctx_len = ((DIV_ROUND_UP(aeadctx->enckey_len, 16)) << 4) + + AEAD_H_SIZE; + transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dst_size); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), flags); + if (!skb) + goto err; + + /* NIC driver is going to write the sge hdr. */ + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + + chcr_req = (struct chcr_wr *)__skb_put(skb, transhdr_len); + memset(chcr_req, 0, transhdr_len); + + if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) + req->assoclen -= 8; + + tag_offset = (op_type == CHCR_ENCRYPT_OP) ? 0 : authsize; + chcr_req->sec_cpl.op_ivinsrtofst = FILL_SEC_CPL_OP_IVINSR( + ctx->dev->tx_channel_id, 2, (ivsize ? + (req->assoclen + 1) : 0)); + chcr_req->sec_cpl.pldlen = htonl(req->assoclen + ivsize + crypt_len); + chcr_req->sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI( + req->assoclen ? 1 : 0, req->assoclen, + req->assoclen + ivsize + 1, 0); + if (req->cryptlen) { + chcr_req->sec_cpl.cipherstop_lo_authinsert = + FILL_SEC_CPL_AUTHINSERT(0, req->assoclen + ivsize + 1, + tag_offset, tag_offset); + chcr_req->sec_cpl.seqno_numivs = + FILL_SEC_CPL_SCMD0_SEQNO(op_type, (op_type == + CHCR_ENCRYPT_OP) ? 1 : 0, + CHCR_SCMD_CIPHER_MODE_AES_GCM, + CHCR_SCMD_AUTH_MODE_GHASH, hmac_ctrl, + ivsize >> 1); + } else { + chcr_req->sec_cpl.cipherstop_lo_authinsert = + FILL_SEC_CPL_AUTHINSERT(0, 0, 0, 0); + chcr_req->sec_cpl.seqno_numivs = + FILL_SEC_CPL_SCMD0_SEQNO(op_type, + (op_type == CHCR_ENCRYPT_OP) ? + 1 : 0, CHCR_SCMD_CIPHER_MODE_AES_CBC, + 0, 0, ivsize >> 1); + } + chcr_req->sec_cpl.ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1, + 0, 1, dst_size); + chcr_req->key_ctx.ctx_hdr = aeadctx->key_ctx_hdr; + memcpy(chcr_req->key_ctx.key, aeadctx->key, aeadctx->enckey_len); + memcpy(chcr_req->key_ctx.key + (DIV_ROUND_UP(aeadctx->enckey_len, 16) * + 16), GCM_CTX(aeadctx)->ghash_h, AEAD_H_SIZE); + + /* prepare a 16 byte iv */ + /* S A L T | IV | 0x00000001 */ + if (get_aead_subtype(tfm) == + CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) { + memcpy(reqctx->iv, aeadctx->salt, 4); + memcpy(reqctx->iv + 4, req->iv, 8); + } else { + memcpy(reqctx->iv, req->iv, 12); + } + *((unsigned int *)(reqctx->iv + 12)) = htonl(0x01); + + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)(chcr_req + 1) + kctx_len); + sg_param.nents = reqctx->dst_nents; + sg_param.obsize = req->cryptlen + (op_type ? -authsize : authsize); + sg_param.qid = qid; + sg_param.align = 0; + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, dst, + &sg_param)) + goto dstmap_fail; + + skb_set_transport_header(skb, transhdr_len); + + write_sg_to_skb(skb, &frags, req->src, req->assoclen); + + write_buffer_to_skb(skb, &frags, reqctx->iv, ivsize); + + if (req->cryptlen) { + write_sg_to_skb(skb, &frags, src, req->cryptlen); + } else { + aes_gcm_empty_pld_pad(req->dst, authsize - 1); + write_sg_to_skb(skb, &frags, dst, crypt_len); + } + + create_wreq(ctx, chcr_req, req, skb, kctx_len, size, 1, + sizeof(struct cpl_rx_phys_dsgl) + dst_size); + reqctx->skb = skb; + skb_get(skb); + return skb; + +dstmap_fail: + /* ivmap_fail: */ + kfree_skb(skb); + skb = NULL; +err: + return skb; +} + + + +static int chcr_aead_cra_init(struct crypto_aead *tfm) +{ + struct chcr_context *ctx = crypto_aead_ctx(tfm); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + + crypto_aead_set_reqsize(tfm, sizeof(struct chcr_aead_reqctx)); + aeadctx->null = crypto_get_default_null_skcipher(); + if (IS_ERR(aeadctx->null)) + return PTR_ERR(aeadctx->null); + return chcr_device_init(ctx); +} + +static void chcr_aead_cra_exit(struct crypto_aead *tfm) +{ + crypto_put_default_null_skcipher(); +} + +static int chcr_authenc_null_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NOP; + aeadctx->mayverify = VERIFY_HW; + return 0; +} +static int chcr_authenc_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + u32 maxauth = crypto_aead_maxauthsize(tfm); + + /*SHA1 authsize in ipsec is 12 instead of 10 i.e maxauthsize / 2 is not + * true for sha1. authsize == 12 condition should be before + * authsize == (maxauth >> 1) + */ + if (authsize == ICV_4) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == ICV_6) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == ICV_10) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == ICV_12) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == ICV_14) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == (maxauth >> 1)) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2; + aeadctx->mayverify = VERIFY_HW; + } else if (authsize == maxauth) { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_HW; + } else { + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_SW; + } + return 0; +} + + +static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + + switch (authsize) { + case ICV_4: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_8: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_12: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_14: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_16: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_13: + case ICV_15: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_SW; + break; + default: + + crypto_tfm_set_flags((struct crypto_tfm *) tfm, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return 0; +} + +static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + + switch (authsize) { + case ICV_8: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_12: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_16: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_HW; + break; + default: + crypto_tfm_set_flags((struct crypto_tfm *)tfm, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return 0; +} + +static int chcr_ccm_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + + switch (authsize) { + case ICV_4: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL1; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_6: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL2; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_8: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_10: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_12: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_14: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_PL3; + aeadctx->mayverify = VERIFY_HW; + break; + case ICV_16: + aeadctx->hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC; + aeadctx->mayverify = VERIFY_HW; + break; + default: + crypto_tfm_set_flags((struct crypto_tfm *)tfm, + CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + return 0; +} + +static int chcr_aead_ccm_setkey(struct crypto_aead *aead, + const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_aead_ctx(aead); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + unsigned char ck_size, mk_size; + int key_ctx_size = 0; + + memcpy(aeadctx->key, key, keylen); + aeadctx->enckey_len = keylen; + key_ctx_size = sizeof(struct _key_ctx) + + ((DIV_ROUND_UP(keylen, 16)) << 4) * 2; + if (keylen == AES_KEYSIZE_128) { + mk_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keylen == AES_KEYSIZE_192) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_192; + } else if (keylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + } else { + crypto_tfm_set_flags((struct crypto_tfm *)aead, + CRYPTO_TFM_RES_BAD_KEY_LEN); + aeadctx->enckey_len = 0; + return -EINVAL; + } + aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, mk_size, 0, 0, + key_ctx_size >> 4); + return 0; +} + +static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_aead_ctx(aead); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + + if (keylen < 3) { + crypto_tfm_set_flags((struct crypto_tfm *)aead, + CRYPTO_TFM_RES_BAD_KEY_LEN); + aeadctx->enckey_len = 0; + return -EINVAL; + } + keylen -= 3; + memcpy(aeadctx->salt, key + keylen, 3); + return chcr_aead_ccm_setkey(aead, key, keylen); +} + +static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_aead_ctx(aead); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx); + struct blkcipher_desc h_desc; + struct scatterlist src[1]; + unsigned int ck_size; + int ret = 0, key_ctx_size = 0; + + if (get_aead_subtype(aead) == + CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106) { + keylen -= 4; /* nonce/salt is present in the last 4 bytes */ + memcpy(aeadctx->salt, key + keylen, 4); + } + if (keylen == AES_KEYSIZE_128) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keylen == AES_KEYSIZE_192) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + } else if (keylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + } else { + crypto_tfm_set_flags((struct crypto_tfm *)aead, + CRYPTO_TFM_RES_BAD_KEY_LEN); + aeadctx->enckey_len = 0; + pr_err("GCM: Invalid key length %d", keylen); + ret = -EINVAL; + goto out; + } + + memcpy(aeadctx->key, key, keylen); + aeadctx->enckey_len = keylen; + key_ctx_size = sizeof(struct _key_ctx) + + ((DIV_ROUND_UP(keylen, 16)) << 4) + + AEAD_H_SIZE; + aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, + CHCR_KEYCTX_MAC_KEY_SIZE_128, + 0, 0, + key_ctx_size >> 4); + /* Calculate the H = CIPH(K, 0 repeated 16 times) using sync aes + * blkcipher It will go on key context + */ + h_desc.tfm = crypto_alloc_blkcipher("cbc(aes-generic)", 0, 0); + if (IS_ERR(h_desc.tfm)) { + aeadctx->enckey_len = 0; + ret = -ENOMEM; + goto out; + } + h_desc.flags = 0; + ret = crypto_blkcipher_setkey(h_desc.tfm, key, keylen); + if (ret) { + aeadctx->enckey_len = 0; + goto out1; + } + memset(gctx->ghash_h, 0, AEAD_H_SIZE); + sg_init_one(&src[0], gctx->ghash_h, AEAD_H_SIZE); + ret = crypto_blkcipher_encrypt(&h_desc, &src[0], &src[0], AEAD_H_SIZE); + +out1: + crypto_free_blkcipher(h_desc.tfm); +out: + return ret; +} + +static int chcr_authenc_setkey(struct crypto_aead *authenc, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_aead_ctx(authenc); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); + /* it contains auth and cipher key both*/ + struct crypto_authenc_keys keys; + unsigned int bs; + unsigned int max_authsize = crypto_aead_alg(authenc)->maxauthsize; + int err = 0, i, key_ctx_len = 0; + unsigned char ck_size = 0; + unsigned char pad[CHCR_HASH_MAX_BLOCK_SIZE_128] = { 0 }; + struct crypto_shash *base_hash = NULL; + struct algo_param param; + int align; + u8 *o_ptr = NULL; + + if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) { + crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN); + goto out; + } + + if (get_alg_config(¶m, max_authsize)) { + pr_err("chcr : Unsupported digest size\n"); + goto out; + } + if (keys.enckeylen == AES_KEYSIZE_128) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keys.enckeylen == AES_KEYSIZE_192) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + } else if (keys.enckeylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + } else { + pr_err("chcr : Unsupported cipher key\n"); + goto out; + } + + /* Copy only encryption key. We use authkey to generate h(ipad) and + * h(opad) so authkey is not needed again. authkeylen size have the + * size of the hash digest size. + */ + memcpy(aeadctx->key, keys.enckey, keys.enckeylen); + aeadctx->enckey_len = keys.enckeylen; + get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key, + aeadctx->enckey_len << 3); + + base_hash = chcr_alloc_shash(max_authsize); + if (IS_ERR(base_hash)) { + pr_err("chcr : Base driver cannot be loaded\n"); + goto out; } + { + SHASH_DESC_ON_STACK(shash, base_hash); + shash->tfm = base_hash; + shash->flags = crypto_shash_get_flags(base_hash); + bs = crypto_shash_blocksize(base_hash); + align = KEYCTX_ALIGN_PAD(max_authsize); + o_ptr = actx->h_iopad + param.result_size + align; + + if (keys.authkeylen > bs) { + err = crypto_shash_digest(shash, keys.authkey, + keys.authkeylen, + o_ptr); + if (err) { + pr_err("chcr : Base driver cannot be loaded\n"); + goto out; + } + keys.authkeylen = max_authsize; + } else + memcpy(o_ptr, keys.authkey, keys.authkeylen); + + /* Compute the ipad-digest*/ + memset(pad + keys.authkeylen, 0, bs - keys.authkeylen); + memcpy(pad, o_ptr, keys.authkeylen); + for (i = 0; i < bs >> 2; i++) + *((unsigned int *)pad + i) ^= IPAD_DATA; + + if (chcr_compute_partial_hash(shash, pad, actx->h_iopad, + max_authsize)) + goto out; + /* Compute the opad-digest */ + memset(pad + keys.authkeylen, 0, bs - keys.authkeylen); + memcpy(pad, o_ptr, keys.authkeylen); + for (i = 0; i < bs >> 2; i++) + *((unsigned int *)pad + i) ^= OPAD_DATA; + + if (chcr_compute_partial_hash(shash, pad, o_ptr, max_authsize)) + goto out; + + /* convert the ipad and opad digest to network order */ + chcr_change_order(actx->h_iopad, param.result_size); + chcr_change_order(o_ptr, param.result_size); + key_ctx_len = sizeof(struct _key_ctx) + + ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4) + + (param.result_size + align) * 2; + aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, param.mk_size, + 0, 1, key_ctx_len >> 4); + actx->auth_mode = param.auth_mode; + chcr_free_shash(base_hash); + + return 0; + } +out: + aeadctx->enckey_len = 0; + if (base_hash) + chcr_free_shash(base_hash); + return -EINVAL; } +static int chcr_aead_digest_null_setkey(struct crypto_aead *authenc, + const u8 *key, unsigned int keylen) +{ + struct chcr_context *ctx = crypto_aead_ctx(authenc); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); + struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); + struct crypto_authenc_keys keys; + + /* it contains auth and cipher key both*/ + int key_ctx_len = 0; + unsigned char ck_size = 0; + + if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) { + crypto_aead_set_flags(authenc, CRYPTO_TFM_RES_BAD_KEY_LEN); + goto out; + } + if (keys.enckeylen == AES_KEYSIZE_128) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keys.enckeylen == AES_KEYSIZE_192) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + } else if (keys.enckeylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + } else { + pr_err("chcr : Unsupported cipher key\n"); + goto out; + } + memcpy(aeadctx->key, keys.enckey, keys.enckeylen); + aeadctx->enckey_len = keys.enckeylen; + get_aes_decrypt_key(actx->dec_rrkey, aeadctx->key, + aeadctx->enckey_len << 3); + key_ctx_len = sizeof(struct _key_ctx) + + ((DIV_ROUND_UP(keys.enckeylen, 16)) << 4); + + aeadctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, 0, + 0, key_ctx_len >> 4); + actx->auth_mode = CHCR_SCMD_AUTH_MODE_NOP; + return 0; +out: + aeadctx->enckey_len = 0; + return -EINVAL; +} +static int chcr_aead_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + + reqctx->verify = VERIFY_HW; + + switch (get_aead_subtype(tfm)) { + case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC: + case CRYPTO_ALG_SUB_TYPE_AEAD_NULL: + return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0, + create_authenc_wr); + case CRYPTO_ALG_SUB_TYPE_AEAD_CCM: + case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309: + return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0, + create_aead_ccm_wr); + default: + return chcr_aead_op(req, CHCR_ENCRYPT_OP, 0, + create_gcm_wr); + } +} + +static int chcr_aead_decrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_aead_ctx *aeadctx = AEAD_CTX(crypto_aead_ctx(tfm)); + struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + int size; + + if (aeadctx->mayverify == VERIFY_SW) { + size = crypto_aead_maxauthsize(tfm); + reqctx->verify = VERIFY_SW; + } else { + size = 0; + reqctx->verify = VERIFY_HW; + } + + switch (get_aead_subtype(tfm)) { + case CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC: + case CRYPTO_ALG_SUB_TYPE_AEAD_NULL: + return chcr_aead_op(req, CHCR_DECRYPT_OP, size, + create_authenc_wr); + case CRYPTO_ALG_SUB_TYPE_AEAD_CCM: + case CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309: + return chcr_aead_op(req, CHCR_DECRYPT_OP, size, + create_aead_ccm_wr); + default: + return chcr_aead_op(req, CHCR_DECRYPT_OP, size, + create_gcm_wr); + } +} + +static int chcr_aead_op(struct aead_request *req, + unsigned short op_type, + int size, + create_wr_t create_wr_fn) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct chcr_context *ctx = crypto_aead_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct sk_buff *skb; + + if (ctx && !ctx->dev) { + pr_err("chcr : %s : No crypto device.\n", __func__); + return -ENXIO; + } + if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id)) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + /* Form a WR from req */ + skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[ctx->tx_channel_id], size, + op_type); + + if (IS_ERR(skb) || skb == NULL) { + pr_err("chcr : %s : failed to form WR. No memory\n", __func__); + return PTR_ERR(skb); + } + + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} static struct chcr_alg_template driver_algs[] = { /* AES-CBC */ { @@ -1234,7 +2487,7 @@ static struct chcr_alg_template driver_algs[] = { .is_registered = 0, .alg.crypto = { .cra_name = "cbc(aes)", - .cra_driver_name = "cbc(aes-chcr)", + .cra_driver_name = "cbc-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_ASYNC, @@ -1261,7 +2514,7 @@ static struct chcr_alg_template driver_algs[] = { .is_registered = 0, .alg.crypto = { .cra_name = "xts(aes)", - .cra_driver_name = "xts(aes-chcr)", + .cra_driver_name = "xts-aes-chcr", .cra_priority = CHCR_CRA_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_ASYNC, @@ -1354,7 +2607,7 @@ static struct chcr_alg_template driver_algs[] = { .halg.digestsize = SHA1_DIGEST_SIZE, .halg.base = { .cra_name = "hmac(sha1)", - .cra_driver_name = "hmac(sha1-chcr)", + .cra_driver_name = "hmac-sha1-chcr", .cra_blocksize = SHA1_BLOCK_SIZE, } } @@ -1366,7 +2619,7 @@ static struct chcr_alg_template driver_algs[] = { .halg.digestsize = SHA224_DIGEST_SIZE, .halg.base = { .cra_name = "hmac(sha224)", - .cra_driver_name = "hmac(sha224-chcr)", + .cra_driver_name = "hmac-sha224-chcr", .cra_blocksize = SHA224_BLOCK_SIZE, } } @@ -1378,7 +2631,7 @@ static struct chcr_alg_template driver_algs[] = { .halg.digestsize = SHA256_DIGEST_SIZE, .halg.base = { .cra_name = "hmac(sha256)", - .cra_driver_name = "hmac(sha256-chcr)", + .cra_driver_name = "hmac-sha256-chcr", .cra_blocksize = SHA256_BLOCK_SIZE, } } @@ -1390,7 +2643,7 @@ static struct chcr_alg_template driver_algs[] = { .halg.digestsize = SHA384_DIGEST_SIZE, .halg.base = { .cra_name = "hmac(sha384)", - .cra_driver_name = "hmac(sha384-chcr)", + .cra_driver_name = "hmac-sha384-chcr", .cra_blocksize = SHA384_BLOCK_SIZE, } } @@ -1402,11 +2655,205 @@ static struct chcr_alg_template driver_algs[] = { .halg.digestsize = SHA512_DIGEST_SIZE, .halg.base = { .cra_name = "hmac(sha512)", - .cra_driver_name = "hmac(sha512-chcr)", + .cra_driver_name = "hmac-sha512-chcr", .cra_blocksize = SHA512_BLOCK_SIZE, } } }, + /* Add AEAD Algorithms */ + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_GCM, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-chcr", + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_gcm_ctx), + }, + .ivsize = 12, + .maxauthsize = GHASH_DIGEST_SIZE, + .setkey = chcr_gcm_setkey, + .setauthsize = chcr_gcm_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aes-chcr", + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_gcm_ctx), + + }, + .ivsize = 8, + .maxauthsize = GHASH_DIGEST_SIZE, + .setkey = chcr_gcm_setkey, + .setauthsize = chcr_4106_4309_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_CCM, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-chcr", + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = GHASH_DIGEST_SIZE, + .setkey = chcr_aead_ccm_setkey, + .setauthsize = chcr_ccm_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "rfc4309(ccm(aes))", + .cra_driver_name = "rfc4309-ccm-aes-chcr", + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx), + + }, + .ivsize = 8, + .maxauthsize = GHASH_DIGEST_SIZE, + .setkey = chcr_aead_rfc4309_setkey, + .setauthsize = chcr_4106_4309_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = + "authenc-hmac-sha1-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + .setkey = chcr_authenc_setkey, + .setauthsize = chcr_authenc_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC, + .is_registered = 0, + .alg.aead = { + .base = { + + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = + "authenc-hmac-sha256-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + .setkey = chcr_authenc_setkey, + .setauthsize = chcr_authenc_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = + "authenc-hmac-sha224-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + .setkey = chcr_authenc_setkey, + .setauthsize = chcr_authenc_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = + "authenc-hmac-sha384-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + .setkey = chcr_authenc_setkey, + .setauthsize = chcr_authenc_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = + "authenc-hmac-sha512-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + .setkey = chcr_authenc_setkey, + .setauthsize = chcr_authenc_setauthsize, + } + }, + { + .type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_SUB_TYPE_AEAD_NULL, + .is_registered = 0, + .alg.aead = { + .base = { + .cra_name = "authenc(digest_null,cbc(aes))", + .cra_driver_name = + "authenc-digest_null-cbc-aes-chcr", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct chcr_aead_ctx) + + sizeof(struct chcr_authenc_ctx), + + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = 0, + .setkey = chcr_aead_digest_null_setkey, + .setauthsize = chcr_authenc_null_setauthsize, + } + }, }; /* @@ -1424,6 +2871,11 @@ static int chcr_unregister_alg(void) crypto_unregister_alg( &driver_algs[i].alg.crypto); break; + case CRYPTO_ALG_TYPE_AEAD: + if (driver_algs[i].is_registered) + crypto_unregister_aead( + &driver_algs[i].alg.aead); + break; case CRYPTO_ALG_TYPE_AHASH: if (driver_algs[i].is_registered) crypto_unregister_ahash( @@ -1458,6 +2910,19 @@ static int chcr_register_alg(void) err = crypto_register_alg(&driver_algs[i].alg.crypto); name = driver_algs[i].alg.crypto.cra_driver_name; break; + case CRYPTO_ALG_TYPE_AEAD: + driver_algs[i].alg.aead.base.cra_priority = + CHCR_CRA_PRIORITY; + driver_algs[i].alg.aead.base.cra_flags = + CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; + driver_algs[i].alg.aead.encrypt = chcr_aead_encrypt; + driver_algs[i].alg.aead.decrypt = chcr_aead_decrypt; + driver_algs[i].alg.aead.init = chcr_aead_cra_init; + driver_algs[i].alg.aead.exit = chcr_aead_cra_exit; + driver_algs[i].alg.aead.base.cra_module = THIS_MODULE; + err = crypto_register_aead(&driver_algs[i].alg.aead); + name = driver_algs[i].alg.aead.base.cra_driver_name; + break; case CRYPTO_ALG_TYPE_AHASH: a_hash = &driver_algs[i].alg.hash; a_hash->update = chcr_ahash_update; diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h index 199b0bb..3c7c51f 100644 --- a/drivers/crypto/chelsio/chcr_algo.h +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -108,30 +108,24 @@ #define IPAD_DATA 0x36363636 #define OPAD_DATA 0x5c5c5c5c -#define TRANSHDR_SIZE(alignedkctx_len)\ - (sizeof(struct ulptx_idata) +\ - sizeof(struct ulp_txpkt) +\ - sizeof(struct fw_crypto_lookaside_wr) +\ - sizeof(struct cpl_tx_sec_pdu) +\ - (alignedkctx_len)) -#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \ - (TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\ +#define TRANSHDR_SIZE(kctx_len)\ + (sizeof(struct chcr_wr) +\ + kctx_len) +#define CIPHER_TRANSHDR_SIZE(kctx_len, sge_pairs) \ + (TRANSHDR_SIZE((kctx_len)) + (sge_pairs) +\ sizeof(struct cpl_rx_phys_dsgl)) -#define HASH_TRANSHDR_SIZE(alignedkctx_len)\ - (TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES) +#define HASH_TRANSHDR_SIZE(kctx_len)\ + (TRANSHDR_SIZE(kctx_len) + DUMMY_BYTES) -#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \ - sizeof(struct ulp_txpkt) + \ - sizeof(struct ulptx_idata)) -#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst) \ +#define FILL_SEC_CPL_OP_IVINSR(id, len, ofst) \ htonl( \ CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \ CPL_TX_SEC_PDU_RXCHID_V((id)) | \ CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \ CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \ CPL_TX_SEC_PDU_CPLLEN_V((len)) | \ - CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \ + CPL_TX_SEC_PDU_PLACEHOLDER_V(0) | \ CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst))) #define FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \ @@ -148,7 +142,7 @@ CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \ CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst))) -#define FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs) \ +#define FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size) \ htonl( \ SCMD_SEQ_NO_CTRL_V(0) | \ SCMD_STATUS_PRESENT_V(0) | \ @@ -159,7 +153,7 @@ SCMD_AUTH_MODE_V((amode)) | \ SCMD_HMAC_CTRL_V((opad)) | \ SCMD_IV_SIZE_V((size)) | \ - SCMD_NUM_IVS_V((nivs))) + SCMD_NUM_IVS_V(0)) #define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \ SCMD_ENB_DBGID_V(0) | \ @@ -264,13 +258,15 @@ enum { * where they indicate the size of the integrity check value (ICV) */ enum { - AES_CCM_ICV_4 = 4, - AES_CCM_ICV_6 = 6, - AES_CCM_ICV_8 = 8, - AES_CCM_ICV_10 = 10, - AES_CCM_ICV_12 = 12, - AES_CCM_ICV_14 = 14, - AES_CCM_ICV_16 = 16 + ICV_4 = 4, + ICV_6 = 6, + ICV_8 = 8, + ICV_10 = 10, + ICV_12 = 12, + ICV_13 = 13, + ICV_14 = 14, + ICV_15 = 15, + ICV_16 = 16 }; struct hash_op_params { @@ -394,7 +390,7 @@ static const u8 aes_sbox[256] = { 187, 22 }; -static u32 aes_ks_subword(const u32 w) +static inline u32 aes_ks_subword(const u32 w) { u8 bytes[4]; @@ -412,61 +408,4 @@ static u32 round_constant[11] = { 0x1B000000, 0x36000000, 0x6C000000 }; -/* dec_key - OUTPUT - Reverse round key - * key - INPUT - key - * keylength - INPUT - length of the key in number of bits - */ -static inline void get_aes_decrypt_key(unsigned char *dec_key, - const unsigned char *key, - unsigned int keylength) -{ - u32 temp; - u32 w_ring[MAX_NK]; - int i, j, k; - u8 nr, nk; - - switch (keylength) { - case AES_KEYLENGTH_128BIT: - nk = KEYLENGTH_4BYTES; - nr = NUMBER_OF_ROUNDS_10; - break; - - case AES_KEYLENGTH_192BIT: - nk = KEYLENGTH_6BYTES; - nr = NUMBER_OF_ROUNDS_12; - break; - case AES_KEYLENGTH_256BIT: - nk = KEYLENGTH_8BYTES; - nr = NUMBER_OF_ROUNDS_14; - break; - default: - return; - } - for (i = 0; i < nk; i++ ) - w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]); - - i = 0; - temp = w_ring[nk - 1]; - while(i + nk < (nr + 1) * 4) { - if(!(i % nk)) { - /* RotWord(temp) */ - temp = (temp << 8) | (temp >> 24); - temp = aes_ks_subword(temp); - temp ^= round_constant[i / nk]; - } - else if (nk == 8 && (i % 4 == 0)) - temp = aes_ks_subword(temp); - w_ring[i % nk] ^= temp; - temp = w_ring[i % nk]; - i++; - } - i--; - for (k = 0, j = i % nk; k < nk; k++) { - *((u32 *)dec_key + k) = htonl(w_ring[j]); - j--; - if(j < 0) - j += nk; - } -} - #endif /* __CHCR_ALGO_H__ */ diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c index 4d7f670..918da8e 100644 --- a/drivers/crypto/chelsio/chcr_core.c +++ b/drivers/crypto/chelsio/chcr_core.c @@ -110,14 +110,12 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev, if (ack_err_status) { if (CHK_MAC_ERR_BIT(ack_err_status) || CHK_PAD_ERR_BIT(ack_err_status)) - error_status = -EINVAL; + error_status = -EBADMSG; } /* call completion callback with failure status */ if (req) { - if (!chcr_handle_resp(req, input, error_status)) - req->complete(req, error_status); - else - return -EINVAL; + error_status = chcr_handle_resp(req, input, error_status); + req->complete(req, error_status); } else { pr_err("Incorrect request address from the firmware\n"); return -EFAULT; diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h index 2a5c671..c7088a4 100644 --- a/drivers/crypto/chelsio/chcr_core.h +++ b/drivers/crypto/chelsio/chcr_core.h @@ -52,13 +52,27 @@ #define MAC_ERROR_BIT 0 #define CHK_MAC_ERR_BIT(x) (((x) >> MAC_ERROR_BIT) & 1) +#define MAX_SALT 4 struct uld_ctx; +struct _key_ctx { + __be32 ctx_hdr; + u8 salt[MAX_SALT]; + __be64 reserverd; + unsigned char key[0]; +}; + +struct chcr_wr { + struct fw_crypto_lookaside_wr wreq; + struct ulp_txpkt ulptx; + struct ulptx_idata sc_imm; + struct cpl_tx_sec_pdu sec_cpl; + struct _key_ctx key_ctx; +}; + struct chcr_dev { - /* Request submited to h/w and waiting for response. */ spinlock_t lock_chcr_dev; - struct crypto_queue pending_queue; struct uld_ctx *u_ctx; unsigned char tx_channel_id; }; diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index d7d7560..d5af7d6 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -36,6 +36,14 @@ #ifndef __CHCR_CRYPTO_H__ #define __CHCR_CRYPTO_H__ +#define GHASH_BLOCK_SIZE 16 +#define GHASH_DIGEST_SIZE 16 + +#define CCM_B0_SIZE 16 +#define CCM_AAD_FIELD_SIZE 2 +#define T5_MAX_AAD_SIZE 512 + + /* Define following if h/w is not dropping the AAD and IV data before * giving the processed data */ @@ -63,22 +71,36 @@ #define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0 #define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1 -#define CHCR_SCMD_CIPHER_MODE_NOP 0 -#define CHCR_SCMD_CIPHER_MODE_AES_CBC 1 -#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES 4 -#define CHCR_SCMD_CIPHER_MODE_AES_XTS 6 +#define CHCR_SCMD_CIPHER_MODE_NOP 0 +#define CHCR_SCMD_CIPHER_MODE_AES_CBC 1 +#define CHCR_SCMD_CIPHER_MODE_AES_GCM 2 +#define CHCR_SCMD_CIPHER_MODE_AES_CTR 3 +#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES 4 +#define CHCR_SCMD_CIPHER_MODE_AES_XTS 6 +#define CHCR_SCMD_CIPHER_MODE_AES_CCM 7 #define CHCR_SCMD_AUTH_MODE_NOP 0 #define CHCR_SCMD_AUTH_MODE_SHA1 1 #define CHCR_SCMD_AUTH_MODE_SHA224 2 #define CHCR_SCMD_AUTH_MODE_SHA256 3 +#define CHCR_SCMD_AUTH_MODE_GHASH 4 #define CHCR_SCMD_AUTH_MODE_SHA512_224 5 #define CHCR_SCMD_AUTH_MODE_SHA512_256 6 #define CHCR_SCMD_AUTH_MODE_SHA512_384 7 #define CHCR_SCMD_AUTH_MODE_SHA512_512 8 +#define CHCR_SCMD_AUTH_MODE_CBCMAC 9 +#define CHCR_SCMD_AUTH_MODE_CMAC 10 #define CHCR_SCMD_HMAC_CTRL_NOP 0 #define CHCR_SCMD_HMAC_CTRL_NO_TRUNC 1 +#define CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366 2 +#define CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT 3 +#define CHCR_SCMD_HMAC_CTRL_PL1 4 +#define CHCR_SCMD_HMAC_CTRL_PL2 5 +#define CHCR_SCMD_HMAC_CTRL_PL3 6 +#define CHCR_SCMD_HMAC_CTRL_DIV2 7 +#define VERIFY_HW 0 +#define VERIFY_SW 1 #define CHCR_SCMD_IVGEN_CTRL_HW 0 #define CHCR_SCMD_IVGEN_CTRL_SW 1 @@ -106,39 +128,74 @@ #define IV_IMMEDIATE 1 #define IV_DSGL 2 +#define AEAD_H_SIZE 16 + #define CRYPTO_ALG_SUB_TYPE_MASK 0x0f000000 #define CRYPTO_ALG_SUB_TYPE_HASH_HMAC 0x01000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106 0x02000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_GCM 0x03000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_AUTHENC 0x04000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_CCM 0x05000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309 0x06000000 +#define CRYPTO_ALG_SUB_TYPE_AEAD_NULL 0x07000000 +#define CRYPTO_ALG_SUB_TYPE_CTR 0x08000000 #define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\ CRYPTO_ALG_SUB_TYPE_HASH_HMAC) -#define MAX_SALT 4 #define MAX_SCRATCH_PAD_SIZE 32 #define CHCR_HASH_MAX_BLOCK_SIZE_64 64 #define CHCR_HASH_MAX_BLOCK_SIZE_128 128 /* Aligned to 128 bit boundary */ -struct _key_ctx { - __be32 ctx_hdr; - u8 salt[MAX_SALT]; - __be64 reserverd; - unsigned char key[0]; -}; struct ablk_ctx { - u8 enc; - unsigned int processed_len; __be32 key_ctx_hdr; unsigned int enckey_len; - unsigned int dst_nents; - struct scatterlist iv_sg; u8 key[CHCR_AES_MAX_KEY_LEN]; - u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; unsigned char ciph_mode; + u8 rrkey[AES_MAX_KEY_SIZE]; +}; +struct chcr_aead_reqctx { + struct sk_buff *skb; + short int dst_nents; + u16 verify; + u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + unsigned char scratch_pad[MAX_SCRATCH_PAD_SIZE]; +}; + +struct chcr_gcm_ctx { + u8 ghash_h[AEAD_H_SIZE]; }; +struct chcr_authenc_ctx { + u8 dec_rrkey[AES_MAX_KEY_SIZE]; + u8 h_iopad[2 * CHCR_HASH_MAX_DIGEST_SIZE]; + unsigned char auth_mode; +}; + +struct __aead_ctx { + struct chcr_gcm_ctx gcm[0]; + struct chcr_authenc_ctx authenc[0]; +}; + + + +struct chcr_aead_ctx { + __be32 key_ctx_hdr; + unsigned int enckey_len; + struct crypto_skcipher *null; + u8 salt[MAX_SALT]; + u8 key[CHCR_AES_MAX_KEY_LEN]; + u16 hmac_ctrl; + u16 mayverify; + struct __aead_ctx ctx[0]; +}; + + + struct hmac_ctx { - struct shash_desc *desc; + struct crypto_shash *base_hash; u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128]; u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128]; }; @@ -146,6 +203,7 @@ struct hmac_ctx { struct __crypto_ctx { struct hmac_ctx hmacctx[0]; struct ablk_ctx ablkctx[0]; + struct chcr_aead_ctx aeadctx[0]; }; struct chcr_context { @@ -156,18 +214,22 @@ struct chcr_context { struct chcr_ahash_req_ctx { u32 result; - char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128]; - u8 bfr_len; + u8 bfr1[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 bfr2[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 *reqbfr; + u8 *skbfr; + u8 reqlen; /* DMA the partial hash in it */ u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; u64 data_len; /* Data len till time */ - void *dummy_payload_ptr; /* SKB which is being sent to the hardware for processing */ struct sk_buff *skb; }; struct chcr_blkcipher_req_ctx { struct sk_buff *skb; + unsigned int dst_nents; + u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; }; struct chcr_alg_template { @@ -176,16 +238,19 @@ struct chcr_alg_template { union { struct crypto_alg crypto; struct ahash_alg hash; + struct aead_alg aead; } alg; }; struct chcr_req_ctx { union { struct ahash_request *ahash_req; + struct aead_request *aead_req; struct ablkcipher_request *ablk_req; } req; union { struct chcr_ahash_req_ctx *ahash_ctx; + struct chcr_aead_reqctx *reqctx; struct chcr_blkcipher_req_ctx *ablk_ctx; } ctx; }; @@ -195,9 +260,15 @@ struct sge_opaque_hdr { dma_addr_t addr[MAX_SKB_FRAGS + 1]; }; -typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req, - struct chcr_context *ctx, +typedef struct sk_buff *(*create_wr_t)(struct aead_request *req, unsigned short qid, + int size, unsigned short op_type); +static int chcr_aead_op(struct aead_request *req_base, + unsigned short op_type, + int size, + create_wr_t create_wr_fn); +static inline int get_aead_subtype(struct crypto_aead *aead); + #endif /* __CHCR_CRYPTO_H__ */ diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 37dadb2..6e7a5c7 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -375,10 +375,6 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) if (!dma->padding_pool) return -ENOMEM; - dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0); - if (!dma->iv_pool) - return -ENOMEM; - cesa->dma = dma; return 0; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index e423d33..a768da7 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -277,7 +277,7 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 -#define CESA_TDMA_IV 3 +#define CESA_TDMA_RESULT 3 /** * struct mv_cesa_tdma_desc - TDMA descriptor @@ -393,7 +393,6 @@ struct mv_cesa_dev_dma { struct dma_pool *op_pool; struct dma_pool *cache_pool; struct dma_pool *padding_pool; - struct dma_pool *iv_pool; }; /** @@ -839,7 +838,7 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) memset(chain, 0, sizeof(*chain)); } -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, u32 size, u32 flags, gfp_t gfp_flags); struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index d19dc96..098871a 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -212,7 +212,8 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) struct mv_cesa_req *basereq; basereq = &creq->base; - memcpy(ablkreq->info, basereq->chain.last->data, ivsize); + memcpy(ablkreq->info, basereq->chain.last->op->ctx.blkcipher.iv, + ivsize); } else { memcpy_fromio(ablkreq->info, engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, @@ -373,8 +374,9 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, /* Add output data for IV */ ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); - ret = mv_cesa_dma_add_iv_op(&basereq->chain, CESA_SA_CRYPT_IV_SRAM_OFFSET, - ivsize, CESA_TDMA_SRC_IN_SRAM, flags); + ret = mv_cesa_dma_add_result_op(&basereq->chain, CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); if (ret) goto err_free_tdma; diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 77712b3..317cf02 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -311,24 +311,40 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) int i; digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); - for (i = 0; i < digsize / 4; i++) - creq->state[i] = readl_relaxed(engine->regs + CESA_IVDIG(i)); - if (creq->last_req) { + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && + (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) { + __le32 *data = NULL; + /* - * Hardware's MD5 digest is in little endian format, but - * SHA in big endian format + * Result is already in the correct endianess when the SA is + * used */ - if (creq->algo_le) { - __le32 *result = (void *)ahashreq->result; + data = creq->base.chain.last->op->ctx.hash.hash; + for (i = 0; i < digsize / 4; i++) + creq->state[i] = cpu_to_le32(data[i]); - for (i = 0; i < digsize / 4; i++) - result[i] = cpu_to_le32(creq->state[i]); - } else { - __be32 *result = (void *)ahashreq->result; + memcpy(ahashreq->result, data, digsize); + } else { + for (i = 0; i < digsize / 4; i++) + creq->state[i] = readl_relaxed(engine->regs + + CESA_IVDIG(i)); + if (creq->last_req) { + /* + * Hardware's MD5 digest is in little endian format, but + * SHA in big endian format + */ + if (creq->algo_le) { + __le32 *result = (void *)ahashreq->result; + + for (i = 0; i < digsize / 4; i++) + result[i] = cpu_to_le32(creq->state[i]); + } else { + __be32 *result = (void *)ahashreq->result; - for (i = 0; i < digsize / 4; i++) - result[i] = cpu_to_be32(creq->state[i]); + for (i = 0; i < digsize / 4; i++) + result[i] = cpu_to_be32(creq->state[i]); + } } } @@ -503,6 +519,12 @@ mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain, CESA_SA_DESC_CFG_LAST_FRAG, CESA_SA_DESC_CFG_FRAG_MSK); + ret = mv_cesa_dma_add_result_op(chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); + if (ret) + return ERR_PTR(-ENOMEM); return op; } @@ -563,6 +585,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; int ret; + u32 type; basereq->chain.first = NULL; basereq->chain.last = NULL; @@ -634,7 +657,15 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) goto err_free_tdma; } - if (op) { + /* + * If results are copied via DMA, this means that this + * request can be directly processed by the engine, + * without partial updates. So we can chain it at the + * DMA level with other requests. + */ + type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK; + + if (op && type != CESA_TDMA_RESULT) { /* Add dummy desc to wait for crypto operation end */ ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); if (ret) @@ -647,8 +678,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) else creq->cache_ptr = 0; - basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ | - CESA_TDMA_BREAK_CHAIN); + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; + + if (type != CESA_TDMA_RESULT) + basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; return 0; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 9fd7a5f..4416b88 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -69,9 +69,6 @@ void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); - else if (type == CESA_TDMA_IV) - dma_pool_free(cesa_dev->dma->iv_pool, tdma->data, - le32_to_cpu(tdma->dst)); tdma = tdma->next; dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, @@ -209,29 +206,37 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) return new_tdma; } -int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, u32 size, u32 flags, gfp_t gfp_flags) { - - struct mv_cesa_tdma_desc *tdma; - u8 *iv; - dma_addr_t dma_handle; + struct mv_cesa_tdma_desc *tdma, *op_desc; tdma = mv_cesa_dma_add_desc(chain, gfp_flags); if (IS_ERR(tdma)) return PTR_ERR(tdma); - iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle); - if (!iv) - return -ENOMEM; + /* We re-use an existing op_desc object to retrieve the context + * and result instead of allocating a new one. + * There is at least one object of this type in a CESA crypto + * req, just pick the first one in the chain. + */ + for (op_desc = chain->first; op_desc; op_desc = op_desc->next) { + u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK; + + if (type == CESA_TDMA_OP) + break; + } + + if (!op_desc) + return -EIO; tdma->byte_cnt = cpu_to_le32(size | BIT(31)); tdma->src = src; - tdma->dst = cpu_to_le32(dma_handle); - tdma->data = iv; + tdma->dst = op_desc->src; + tdma->op = op_desc->op; flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); - tdma->flags = flags | CESA_TDMA_IV; + tdma->flags = flags | CESA_TDMA_RESULT; return 0; } diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 104e9ce..451fa18 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -1073,7 +1073,7 @@ static int mv_probe(struct platform_device *pdev) if (!res) return -ENXIO; - cp = kzalloc(sizeof(*cp), GFP_KERNEL); + cp = devm_kzalloc(&pdev->dev, sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; @@ -1163,7 +1163,6 @@ err_irq: err_thread: kthread_stop(cp->queue_th); err: - kfree(cp); cpg = NULL; return ret; } @@ -1187,7 +1186,6 @@ static int mv_remove(struct platform_device *pdev) clk_put(cp->clk); } - kfree(cp); cpg = NULL; return 0; } diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 42f0f22..036057a 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -32,7 +32,6 @@ #include <linux/scatterlist.h> #include <linux/device.h> #include <linux/of.h> -#include <linux/types.h> #include <asm/hvcall.h> #include <asm/vio.h> diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 0c49956..1d9ecd3 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -390,7 +390,7 @@ static void sahara_decode_status(struct sahara_dev *dev, unsigned int status) if (status & SAHARA_STATUS_MODE_BATCH) dev_dbg(dev->device, " - Batch Mode.\n"); else if (status & SAHARA_STATUS_MODE_DEDICATED) - dev_dbg(dev->device, " - Decidated Mode.\n"); + dev_dbg(dev->device, " - Dedicated Mode.\n"); else if (status & SAHARA_STATUS_MODE_DEBUG) dev_dbg(dev->device, " - Debug Mode.\n"); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 0418a2f..0bba6a1 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -590,7 +590,7 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) if (v_lo & TALITOS_CCPSR_LO_MDTE) dev_err(dev, "master data transfer error\n"); if (v_lo & TALITOS_CCPSR_LO_SGDLZ) - dev_err(dev, is_sec1 ? "pointeur not complete error\n" + dev_err(dev, is_sec1 ? "pointer not complete error\n" : "s/g data length zero error\n"); if (v_lo & TALITOS_CCPSR_LO_FPZ) dev_err(dev, is_sec1 ? "parity error\n" diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index de6e241..55f7c39 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -10,10 +10,12 @@ endif quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) $(TARGET) > $(@) -$(src)/aesp8-ppc.S: $(src)/aesp8-ppc.pl - $(call cmd,perl) +targets += aesp8-ppc.S ghashp8-ppc.S + +$(obj)/aesp8-ppc.S: $(src)/aesp8-ppc.pl FORCE + $(call if_changed,perl) -$(src)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl - $(call cmd,perl) +$(obj)/ghashp8-ppc.S: $(src)/ghashp8-ppc.pl FORCE + $(call if_changed,perl) -.PRECIOUS: $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S +clean-files := aesp8-ppc.S ghashp8-ppc.S diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h new file mode 100644 index 0000000..e328b52 --- /dev/null +++ b/include/crypto/acompress.h @@ -0,0 +1,269 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li <weigang.li@intel.com> + * Giovanni Cabiddu <giovanni.cabiddu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ACOMP_H +#define _CRYPTO_ACOMP_H +#include <linux/crypto.h> + +#define CRYPTO_ACOMP_ALLOC_OUTPUT 0x00000001 + +/** + * struct acomp_req - asynchronous (de)compression request + * + * @base: Common attributes for asynchronous crypto requests + * @src: Source Data + * @dst: Destination data + * @slen: Size of the input buffer + * @dlen: Size of the output buffer and number of bytes produced + * @flags: Internal flags + * @__ctx: Start of private context data + */ +struct acomp_req { + struct crypto_async_request base; + struct scatterlist *src; + struct scatterlist *dst; + unsigned int slen; + unsigned int dlen; + u32 flags; + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +/** + * struct crypto_acomp - user-instantiated objects which encapsulate + * algorithms and core processing logic + * + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @dst_free: Frees destination buffer if allocated inside the + * algorithm + * @reqsize: Context size for (de)compression requests + * @base: Common crypto API algorithm data structure + */ +struct crypto_acomp { + int (*compress)(struct acomp_req *req); + int (*decompress)(struct acomp_req *req); + void (*dst_free)(struct scatterlist *dst); + unsigned int reqsize; + struct crypto_tfm base; +}; + +/** + * struct acomp_alg - asynchronous compression algorithm + * + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @dst_free: Frees destination buffer if allocated inside the algorithm + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * + * @reqsize: Context size for (de)compression requests + * @base: Common crypto API algorithm data structure + */ +struct acomp_alg { + int (*compress)(struct acomp_req *req); + int (*decompress)(struct acomp_req *req); + void (*dst_free)(struct scatterlist *dst); + int (*init)(struct crypto_acomp *tfm); + void (*exit)(struct crypto_acomp *tfm); + unsigned int reqsize; + struct crypto_alg base; +}; + +/** + * DOC: Asynchronous Compression API + * + * The Asynchronous Compression API is used with the algorithms of type + * CRYPTO_ALG_TYPE_ACOMPRESS (listed as type "acomp" in /proc/crypto) + */ + +/** + * crypto_alloc_acomp() -- allocate ACOMPRESS tfm handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * compression algorithm e.g. "deflate" + * @type: specifies the type of the algorithm + * @mask: specifies the mask for the algorithm + * + * Allocate a handle for a compression algorithm. The returned struct + * crypto_acomp is the handle that is required for any subsequent + * API invocation for the compression operations. + * + * Return: allocated handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +struct crypto_acomp *crypto_alloc_acomp(const char *alg_name, u32 type, + u32 mask); + +static inline struct crypto_tfm *crypto_acomp_tfm(struct crypto_acomp *tfm) +{ + return &tfm->base; +} + +static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct acomp_alg, base); +} + +static inline struct crypto_acomp *__crypto_acomp_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_acomp, base); +} + +static inline struct acomp_alg *crypto_acomp_alg(struct crypto_acomp *tfm) +{ + return __crypto_acomp_alg(crypto_acomp_tfm(tfm)->__crt_alg); +} + +static inline unsigned int crypto_acomp_reqsize(struct crypto_acomp *tfm) +{ + return tfm->reqsize; +} + +static inline void acomp_request_set_tfm(struct acomp_req *req, + struct crypto_acomp *tfm) +{ + req->base.tfm = crypto_acomp_tfm(tfm); +} + +static inline struct crypto_acomp *crypto_acomp_reqtfm(struct acomp_req *req) +{ + return __crypto_acomp_tfm(req->base.tfm); +} + +/** + * crypto_free_acomp() -- free ACOMPRESS tfm handle + * + * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp() + */ +static inline void crypto_free_acomp(struct crypto_acomp *tfm) +{ + crypto_destroy_tfm(tfm, crypto_acomp_tfm(tfm)); +} + +static inline int crypto_has_acomp(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_ACOMPRESS; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +/** + * acomp_request_alloc() -- allocates asynchronous (de)compression request + * + * @tfm: ACOMPRESS tfm handle allocated with crypto_alloc_acomp() + * + * Return: allocated handle in case of success or NULL in case of an error + */ +struct acomp_req *acomp_request_alloc(struct crypto_acomp *tfm); + +/** + * acomp_request_free() -- zeroize and free asynchronous (de)compression + * request as well as the output buffer if allocated + * inside the algorithm + * + * @req: request to free + */ +void acomp_request_free(struct acomp_req *req); + +/** + * acomp_request_set_callback() -- Sets an asynchronous callback + * + * Callback will be called when an asynchronous operation on a given + * request is finished. + * + * @req: request that the callback will be set for + * @flgs: specify for instance if the operation may backlog + * @cmlp: callback which will be called + * @data: private data used by the caller + */ +static inline void acomp_request_set_callback(struct acomp_req *req, + u32 flgs, + crypto_completion_t cmpl, + void *data) +{ + req->base.complete = cmpl; + req->base.data = data; + req->base.flags = flgs; +} + +/** + * acomp_request_set_params() -- Sets request parameters + * + * Sets parameters required by an acomp operation + * + * @req: asynchronous compress request + * @src: pointer to input buffer scatterlist + * @dst: pointer to output buffer scatterlist. If this is NULL, the + * acomp layer will allocate the output memory + * @slen: size of the input buffer + * @dlen: size of the output buffer. If dst is NULL, this can be used by + * the user to specify the maximum amount of memory to allocate + */ +static inline void acomp_request_set_params(struct acomp_req *req, + struct scatterlist *src, + struct scatterlist *dst, + unsigned int slen, + unsigned int dlen) +{ + req->src = src; + req->dst = dst; + req->slen = slen; + req->dlen = dlen; + + if (!req->dst) + req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT; +} + +/** + * crypto_acomp_compress() -- Invoke asynchronous compress operation + * + * Function invokes the asynchronous compress operation + * + * @req: asynchronous compress request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_acomp_compress(struct acomp_req *req) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + + return tfm->compress(req); +} + +/** + * crypto_acomp_decompress() -- Invoke asynchronous decompress operation + * + * Function invokes the asynchronous decompress operation + * + * @req: asynchronous compress request + * + * Return: zero on success; error code in case of error + */ +static inline int crypto_acomp_decompress(struct acomp_req *req) +{ + struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); + + return tfm->decompress(req); +} + +#endif diff --git a/include/crypto/cbc.h b/include/crypto/cbc.h new file mode 100644 index 0000000..f5b8bfc --- /dev/null +++ b/include/crypto/cbc.h @@ -0,0 +1,146 @@ +/* + * CBC: Cipher Block Chaining mode + * + * Copyright (c) 2016 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_CBC_H +#define _CRYPTO_CBC_H + +#include <crypto/internal/skcipher.h> +#include <linux/string.h> +#include <linux/types.h> + +static inline int crypto_cbc_encrypt_segment( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + crypto_xor(iv, src, bsize); + fn(tfm, iv, dst); + memcpy(iv, dst, bsize); + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + return nbytes; +} + +static inline int crypto_cbc_encrypt_inplace( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *iv = walk->iv; + + do { + crypto_xor(src, iv, bsize); + fn(tfm, src, src); + iv = src; + + src += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_encrypt_walk(struct skcipher_request *req, + void (*fn)(struct crypto_skcipher *, + const u8 *, u8 *)) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_walk walk; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + while (walk.nbytes) { + if (walk.src.virt.addr == walk.dst.virt.addr) + err = crypto_cbc_encrypt_inplace(&walk, tfm, fn); + else + err = crypto_cbc_encrypt_segment(&walk, tfm, fn); + err = skcipher_walk_done(&walk, err); + } + + return err; +} + +static inline int crypto_cbc_decrypt_segment( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + u8 *iv = walk->iv; + + do { + fn(tfm, src, dst); + crypto_xor(dst, iv, bsize); + iv = src; + + src += bsize; + dst += bsize; + } while ((nbytes -= bsize) >= bsize); + + memcpy(walk->iv, iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_decrypt_inplace( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + unsigned int bsize = crypto_skcipher_blocksize(tfm); + unsigned int nbytes = walk->nbytes; + u8 *src = walk->src.virt.addr; + u8 last_iv[bsize]; + + /* Start of the last block. */ + src += nbytes - (nbytes & (bsize - 1)) - bsize; + memcpy(last_iv, src, bsize); + + for (;;) { + fn(tfm, src, src); + if ((nbytes -= bsize) < bsize) + break; + crypto_xor(src, src - bsize, bsize); + src -= bsize; + } + + crypto_xor(src, walk->iv, bsize); + memcpy(walk->iv, last_iv, bsize); + + return nbytes; +} + +static inline int crypto_cbc_decrypt_blocks( + struct skcipher_walk *walk, struct crypto_skcipher *tfm, + void (*fn)(struct crypto_skcipher *, const u8 *, u8 *)) +{ + if (walk->src.virt.addr == walk->dst.virt.addr) + return crypto_cbc_decrypt_inplace(walk, tfm, fn); + else + return crypto_cbc_decrypt_segment(walk, tfm, fn); +} + +#endif /* _CRYPTO_CBC_H */ diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index bc792d5..94418cb 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -12,10 +12,10 @@ #ifndef _CRYPTO_CRYPT_H #define _CRYPTO_CRYPT_H -#include <linux/crypto.h> #include <linux/kernel.h> #include <crypto/aead.h> #include <crypto/hash.h> +#include <crypto/skcipher.h> struct cryptd_ablkcipher { struct crypto_ablkcipher base; @@ -34,6 +34,17 @@ struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm); bool cryptd_ablkcipher_queued(struct cryptd_ablkcipher *tfm); void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm); +struct cryptd_skcipher { + struct crypto_skcipher base; +}; + +struct cryptd_skcipher *cryptd_alloc_skcipher(const char *alg_name, + u32 type, u32 mask); +struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm); +/* Must be called without moving CPUs. */ +bool cryptd_skcipher_queued(struct cryptd_skcipher *tfm); +void cryptd_free_skcipher(struct cryptd_skcipher *tfm); + struct cryptd_ahash { struct crypto_ahash base; }; diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 04eb5c7..1bf600f 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -43,8 +43,7 @@ * @prepare_hash_request: do some prepare if need before handle the current request * @unprepare_hash_request: undo any work done by prepare_hash_request() * @hash_one_request: do hash for current request - * @kworker: thread struct for request pump - * @kworker_task: pointer to task for request pump kworker thread + * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data * @cur_req: the current request which is on processing @@ -78,8 +77,7 @@ struct crypto_engine { int (*hash_one_request)(struct crypto_engine *engine, struct ahash_request *req); - struct kthread_worker kworker; - struct task_struct *kworker_task; + struct kthread_worker *kworker; struct kthread_work pump_requests; void *priv_data; diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index da2530e..592d47e 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -177,24 +177,23 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); static inline void gf128mul_free_4k(struct gf128mul_4k *t) { - kfree(t); + kzfree(t); } -/* 64k table optimization, implemented for lle and bbe */ +/* 64k table optimization, implemented for bbe */ struct gf128mul_64k { struct gf128mul_4k *t[16]; }; -/* first initialize with the constant factor with which you - * want to multiply and then call gf128_64k_lle with the other - * factor in the first argument, the table in the second and a - * scratch register in the third. Afterwards *a = *r. */ -struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); +/* First initialize with the constant factor with which you + * want to multiply and then call gf128mul_64k_bbe with the other + * factor in the first argument, and the table in the second. + * Afterwards, the result is stored in *a. + */ struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); void gf128mul_free_64k(struct gf128mul_64k *t); -void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); #endif /* _CRYPTO_GF128MUL_H */ diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h new file mode 100644 index 0000000..1de2b5a --- /dev/null +++ b/include/crypto/internal/acompress.h @@ -0,0 +1,81 @@ +/* + * Asynchronous Compression operations + * + * Copyright (c) 2016, Intel Corporation + * Authors: Weigang Li <weigang.li@intel.com> + * Giovanni Cabiddu <giovanni.cabiddu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_ACOMP_INT_H +#define _CRYPTO_ACOMP_INT_H +#include <crypto/acompress.h> + +/* + * Transform internal helpers. + */ +static inline void *acomp_request_ctx(struct acomp_req *req) +{ + return req->__ctx; +} + +static inline void *acomp_tfm_ctx(struct crypto_acomp *tfm) +{ + return tfm->base.__crt_ctx; +} + +static inline void acomp_request_complete(struct acomp_req *req, + int err) +{ + req->base.complete(&req->base, err); +} + +static inline const char *acomp_alg_name(struct crypto_acomp *tfm) +{ + return crypto_acomp_tfm(tfm)->__crt_alg->cra_name; +} + +static inline struct acomp_req *__acomp_request_alloc(struct crypto_acomp *tfm) +{ + struct acomp_req *req; + + req = kzalloc(sizeof(*req) + crypto_acomp_reqsize(tfm), GFP_KERNEL); + if (likely(req)) + acomp_request_set_tfm(req, tfm); + return req; +} + +static inline void __acomp_request_free(struct acomp_req *req) +{ + kzfree(req); +} + +/** + * crypto_register_acomp() -- Register asynchronous compression algorithm + * + * Function registers an implementation of an asynchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_acomp(struct acomp_alg *alg); + +/** + * crypto_unregister_acomp() -- Unregister asynchronous compression algorithm + * + * Function unregisters an implementation of an asynchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_unregister_acomp(struct acomp_alg *alg); + +#endif diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h new file mode 100644 index 0000000..3fda3c5 --- /dev/null +++ b/include/crypto/internal/scompress.h @@ -0,0 +1,136 @@ +/* + * Synchronous Compression operations + * + * Copyright 2015 LG Electronics Inc. + * Copyright (c) 2016, Intel Corporation + * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _CRYPTO_SCOMP_INT_H +#define _CRYPTO_SCOMP_INT_H +#include <linux/crypto.h> + +#define SCOMP_SCRATCH_SIZE 131072 + +struct crypto_scomp { + struct crypto_tfm base; +}; + +/** + * struct scomp_alg - synchronous compression algorithm + * + * @alloc_ctx: Function allocates algorithm specific context + * @free_ctx: Function frees context allocated with alloc_ctx + * @compress: Function performs a compress operation + * @decompress: Function performs a de-compress operation + * @init: Initialize the cryptographic transformation object. + * This function is used to initialize the cryptographic + * transformation object. This function is called only once at + * the instantiation time, right after the transformation context + * was allocated. In case the cryptographic hardware has some + * special requirements which need to be handled by software, this + * function shall check for the precise requirement of the + * transformation and put any software fallbacks in place. + * @exit: Deinitialize the cryptographic transformation object. This is a + * counterpart to @init, used to remove various changes set in + * @init. + * @base: Common crypto API algorithm data structure + */ +struct scomp_alg { + void *(*alloc_ctx)(struct crypto_scomp *tfm); + void (*free_ctx)(struct crypto_scomp *tfm, void *ctx); + int (*compress)(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx); + int (*decompress)(struct crypto_scomp *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen, + void *ctx); + struct crypto_alg base; +}; + +static inline struct scomp_alg *__crypto_scomp_alg(struct crypto_alg *alg) +{ + return container_of(alg, struct scomp_alg, base); +} + +static inline struct crypto_scomp *__crypto_scomp_tfm(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_scomp, base); +} + +static inline struct crypto_tfm *crypto_scomp_tfm(struct crypto_scomp *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_scomp(struct crypto_scomp *tfm) +{ + crypto_destroy_tfm(tfm, crypto_scomp_tfm(tfm)); +} + +static inline struct scomp_alg *crypto_scomp_alg(struct crypto_scomp *tfm) +{ + return __crypto_scomp_alg(crypto_scomp_tfm(tfm)->__crt_alg); +} + +static inline void *crypto_scomp_alloc_ctx(struct crypto_scomp *tfm) +{ + return crypto_scomp_alg(tfm)->alloc_ctx(tfm); +} + +static inline void crypto_scomp_free_ctx(struct crypto_scomp *tfm, + void *ctx) +{ + return crypto_scomp_alg(tfm)->free_ctx(tfm, ctx); +} + +static inline int crypto_scomp_compress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, void *ctx) +{ + return crypto_scomp_alg(tfm)->compress(tfm, src, slen, dst, dlen, ctx); +} + +static inline int crypto_scomp_decompress(struct crypto_scomp *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + return crypto_scomp_alg(tfm)->decompress(tfm, src, slen, dst, dlen, + ctx); +} + +int crypto_init_scomp_ops_async(struct crypto_tfm *tfm); +struct acomp_req *crypto_acomp_scomp_alloc_ctx(struct acomp_req *req); +void crypto_acomp_scomp_free_ctx(struct acomp_req *req); + +/** + * crypto_register_scomp() -- Register synchronous compression algorithm + * + * Function registers an implementation of a synchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_register_scomp(struct scomp_alg *alg); + +/** + * crypto_unregister_scomp() -- Unregister synchronous compression algorithm + * + * Function unregisters an implementation of a synchronous + * compression algorithm + * + * @alg: algorithm definition + * + * Return: zero on success; error code in case of error + */ +int crypto_unregister_scomp(struct scomp_alg *alg); + +#endif diff --git a/include/crypto/internal/simd.h b/include/crypto/internal/simd.h new file mode 100644 index 0000000..4295099 --- /dev/null +++ b/include/crypto/internal/simd.h @@ -0,0 +1,17 @@ +/* + * Shared crypto simd helpers + */ + +#ifndef _CRYPTO_INTERNAL_SIMD_H +#define _CRYPTO_INTERNAL_SIMD_H + +struct simd_skcipher_alg; + +struct simd_skcipher_alg *simd_skcipher_create_compat(const char *algname, + const char *drvname, + const char *basename); +struct simd_skcipher_alg *simd_skcipher_create(const char *algname, + const char *basename); +void simd_skcipher_free(struct simd_skcipher_alg *alg); + +#endif /* _CRYPTO_INTERNAL_SIMD_H */ diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index a21a95e..8735979 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -15,8 +15,10 @@ #include <crypto/algapi.h> #include <crypto/skcipher.h> +#include <linux/list.h> #include <linux/types.h> +struct aead_request; struct rtattr; struct skcipher_instance { @@ -34,6 +36,40 @@ struct crypto_skcipher_spawn { struct crypto_spawn base; }; +struct skcipher_walk { + union { + struct { + struct page *page; + unsigned long offset; + } phys; + + struct { + u8 *page; + void *addr; + } virt; + } src, dst; + + struct scatter_walk in; + unsigned int nbytes; + + struct scatter_walk out; + unsigned int total; + + struct list_head buffers; + + u8 *page; + u8 *buffer; + u8 *oiv; + void *iv; + + unsigned int ivsize; + + int flags; + unsigned int blocksize; + unsigned int chunksize; + unsigned int alignmask; +}; + extern const struct crypto_type crypto_givcipher_type; static inline struct crypto_instance *skcipher_crypto_instance( @@ -68,14 +104,6 @@ static inline void crypto_set_skcipher_spawn( int crypto_grab_skcipher(struct crypto_skcipher_spawn *spawn, const char *name, u32 type, u32 mask); -static inline int crypto_grab_skcipher2(struct crypto_skcipher_spawn *spawn, - const char *name, u32 type, u32 mask) -{ - return crypto_grab_skcipher(spawn, name, type, mask); -} - -struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask); - static inline void crypto_drop_skcipher(struct crypto_skcipher_spawn *spawn) { crypto_drop_spawn(&spawn->base); @@ -99,12 +127,6 @@ static inline struct crypto_skcipher *crypto_spawn_skcipher( return crypto_spawn_tfm2(&spawn->base); } -static inline struct crypto_skcipher *crypto_spawn_skcipher2( - struct crypto_skcipher_spawn *spawn) -{ - return crypto_spawn_skcipher(spawn); -} - static inline void crypto_skcipher_set_reqsize( struct crypto_skcipher *skcipher, unsigned int reqsize) { @@ -118,6 +140,21 @@ void crypto_unregister_skciphers(struct skcipher_alg *algs, int count); int skcipher_register_instance(struct crypto_template *tmpl, struct skcipher_instance *inst); +int skcipher_walk_done(struct skcipher_walk *walk, int err); +int skcipher_walk_virt(struct skcipher_walk *walk, + struct skcipher_request *req, + bool atomic); +void skcipher_walk_atomise(struct skcipher_walk *walk); +int skcipher_walk_async(struct skcipher_walk *walk, + struct skcipher_request *req); +int skcipher_walk_aead(struct skcipher_walk *walk, struct aead_request *req, + bool atomic); +int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); +int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); +void skcipher_walk_complete(struct skcipher_walk *walk, int err); + static inline void ablkcipher_request_complete(struct ablkcipher_request *req, int err) { diff --git a/include/crypto/xts.h b/include/crypto/xts.h index ede6b97..77b6306 100644 --- a/include/crypto/xts.h +++ b/include/crypto/xts.h @@ -2,8 +2,7 @@ #define _CRYPTO_XTS_H #include <crypto/b128ops.h> -#include <linux/crypto.h> -#include <crypto/algapi.h> +#include <crypto/internal/skcipher.h> #include <linux/fips.h> struct scatterlist; @@ -51,4 +50,27 @@ static inline int xts_check_key(struct crypto_tfm *tfm, return 0; } +static inline int xts_verify_key(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + /* + * key consists of keys of equal size concatenated, therefore + * the length must be even. + */ + if (keylen % 2) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + /* ensure that the AES and tweak key are not identical */ + if ((fips_enabled || crypto_skcipher_get_flags(tfm) & + CRYPTO_TFM_REQ_WEAK_KEY) && + !crypto_memneq(key, key + (keylen / 2), keylen / 2)) { + crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY); + return -EINVAL; + } + + return 0; +} + #endif /* _CRYPTO_XTS_H */ diff --git a/include/linux/ccp.h b/include/linux/ccp.h index a765333..c71dd8f 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -11,8 +11,8 @@ * published by the Free Software Foundation. */ -#ifndef __CPP_H__ -#define __CPP_H__ +#ifndef __CCP_H__ +#define __CCP_H__ #include <linux/scatterlist.h> #include <linux/workqueue.h> @@ -553,7 +553,7 @@ enum ccp_engine { #define CCP_CMD_PASSTHRU_NO_DMA_MAP 0x00000002 /** - * struct ccp_cmd - CPP operation request + * struct ccp_cmd - CCP operation request * @entry: list element (ccp driver use only) * @work: work element used for callbacks (ccp driver use only) * @ccp: CCP device to be run on (ccp driver use only) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 7cee555..167aea2 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -50,6 +50,8 @@ #define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 #define CRYPTO_ALG_TYPE_KPP 0x00000008 +#define CRYPTO_ALG_TYPE_ACOMPRESS 0x0000000a +#define CRYPTO_ALG_TYPE_SCOMPRESS 0x0000000b #define CRYPTO_ALG_TYPE_RNG 0x0000000c #define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d #define CRYPTO_ALG_TYPE_DIGEST 0x0000000e @@ -60,6 +62,7 @@ #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c +#define CRYPTO_ALG_TYPE_ACOMPRESS_MASK 0x0000000e #define CRYPTO_ALG_LARVAL 0x00000010 #define CRYPTO_ALG_DEAD 0x00000020 @@ -87,7 +90,7 @@ #define CRYPTO_ALG_TESTED 0x00000400 /* - * Set if the algorithm is an instance that is build from templates. + * Set if the algorithm is an instance that is built from templates. */ #define CRYPTO_ALG_INSTANCE 0x00000800 diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 34a0dc1..bee0827 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -30,8 +30,7 @@ * Must not be NULL. *OBSOLETE* * @read: New API. drivers can fill up to max bytes of data * into the buffer. The buffer is aligned for any type - * and max is guaranteed to be >= to that alignment - * (either 4 or 8 depending on architecture). + * and max is a multiple of 4 and >= 32 bytes. * @priv: Private data, for use by the RNG driver. * @quality: Estimation of true entropy in RNG's bitstream * (per mill). diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h index 79b5ded..11d21fc 100644 --- a/include/uapi/linux/cryptouser.h +++ b/include/uapi/linux/cryptouser.h @@ -46,6 +46,7 @@ enum crypto_attr_type_t { CRYPTOCFGA_REPORT_CIPHER, /* struct crypto_report_cipher */ CRYPTOCFGA_REPORT_AKCIPHER, /* struct crypto_report_akcipher */ CRYPTOCFGA_REPORT_KPP, /* struct crypto_report_kpp */ + CRYPTOCFGA_REPORT_ACOMP, /* struct crypto_report_acomp */ __CRYPTOCFGA_MAX #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1) @@ -112,5 +113,9 @@ struct crypto_report_kpp { char type[CRYPTO_MAX_NAME]; }; +struct crypto_report_acomp { + char type[CRYPTO_MAX_NAME]; +}; + #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \ sizeof(struct crypto_report_blkcipher)) diff --git a/kernel/padata.c b/kernel/padata.c index 7848f05..05316c9 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -64,15 +64,11 @@ static int padata_cpu_hash(struct parallel_data *pd) static void padata_parallel_worker(struct work_struct *parallel_work) { struct padata_parallel_queue *pqueue; - struct parallel_data *pd; - struct padata_instance *pinst; LIST_HEAD(local_list); local_bh_disable(); pqueue = container_of(parallel_work, struct padata_parallel_queue, work); - pd = pqueue->pd; - pinst = pd->pinst; spin_lock(&pqueue->parallel.lock); list_replace_init(&pqueue->parallel.list, &local_list); |