diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 21:04:48 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-22 21:04:48 -0700 |
commit | 44d21c3f3a2ef2f58b18bda64c52c99e723f3f4a (patch) | |
tree | 5146cf96cb0dbd7121176d484417ab942c92dcd4 /drivers/crypto | |
parent | efdfce2b7ff3205ba0fba10270b92b80bbc6187d (diff) | |
parent | fe55dfdcdfabf160ab0c14617725c57c7a1facfc (diff) | |
download | op-kernel-dev-44d21c3f3a2ef2f58b18bda64c52c99e723f3f4a.zip op-kernel-dev-44d21c3f3a2ef2f58b18bda64c52c99e723f3f4a.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 4.2:
API:
- Convert RNG interface to new style.
- New AEAD interface with one SG list for AD and plain/cipher text.
All external AEAD users have been converted.
- New asymmetric key interface (akcipher).
Algorithms:
- Chacha20, Poly1305 and RFC7539 support.
- New RSA implementation.
- Jitter RNG.
- DRBG is now seeded with both /dev/random and Jitter RNG. If kernel
pool isn't ready then DRBG will be reseeded when it is.
- DRBG is now the default crypto API RNG, replacing krng.
- 842 compression (previously part of powerpc nx driver).
Drivers:
- Accelerated SHA-512 for arm64.
- New Marvell CESA driver that supports DMA and more algorithms.
- Updated powerpc nx 842 support.
- Added support for SEC1 hardware to talitos"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (292 commits)
crypto: marvell/cesa - remove COMPILE_TEST dependency
crypto: algif_aead - Temporarily disable all AEAD algorithms
crypto: af_alg - Forbid the use internal algorithms
crypto: echainiv - Only hold RNG during initialisation
crypto: seqiv - Add compatibility support without RNG
crypto: eseqiv - Offer normal cipher functionality without RNG
crypto: chainiv - Offer normal cipher functionality without RNG
crypto: user - Add CRYPTO_MSG_DELRNG
crypto: user - Move cryptouser.h to uapi
crypto: rng - Do not free default RNG when it becomes unused
crypto: skcipher - Allow givencrypt to be NULL
crypto: sahara - propagate the error on clk_disable_unprepare() failure
crypto: rsa - fix invalid select for AKCIPHER
crypto: picoxcell - Update to the current clk API
crypto: nx - Check for bogus firmware properties
crypto: marvell/cesa - add DT bindings documentation
crypto: marvell/cesa - add support for Kirkwood and Dove SoCs
crypto: marvell/cesa - add support for Orion SoCs
crypto: marvell/cesa - add allhwsupport module parameter
crypto: marvell/cesa - add support for all armada SoCs
...
Diffstat (limited to 'drivers/crypto')
55 files changed, 8943 insertions, 3407 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 033c0c8..4044125 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -162,10 +162,10 @@ config CRYPTO_GHASH_S390 config CRYPTO_DEV_MV_CESA tristate "Marvell's Cryptographic Engine" depends on PLAT_ORION - select CRYPTO_ALGAPI select CRYPTO_AES - select CRYPTO_BLKCIPHER2 + select CRYPTO_BLKCIPHER select CRYPTO_HASH + select SRAM help This driver allows you to utilize the Cryptographic Engines and Security Accelerator (CESA) which can be found on the Marvell Orion @@ -173,10 +173,27 @@ config CRYPTO_DEV_MV_CESA Currently the driver supports AES in ECB and CBC mode without DMA. +config CRYPTO_DEV_MARVELL_CESA + tristate "New Marvell's Cryptographic Engine driver" + depends on PLAT_ORION || ARCH_MVEBU + select CRYPTO_AES + select CRYPTO_DES + select CRYPTO_BLKCIPHER + select CRYPTO_HASH + select SRAM + help + This driver allows you to utilize the Cryptographic Engines and + Security Accelerator (CESA) which can be found on the Armada 370. + This driver supports CPU offload through DMA transfers. + + This driver is aimed at replacing the mv_cesa driver. This will only + happen once it has received proper testing. + config CRYPTO_DEV_NIAGARA2 tristate "Niagara2 Stream Processing Unit driver" select CRYPTO_DES - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER + select CRYPTO_HASH depends on SPARC64 help Each core of a Niagara2 processor contains a Stream @@ -189,7 +206,6 @@ config CRYPTO_DEV_NIAGARA2 config CRYPTO_DEV_HIFN_795X tristate "Driver HIFN 795x crypto accelerator chips" select CRYPTO_DES - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG depends on PCI @@ -208,8 +224,10 @@ source drivers/crypto/caam/Kconfig config CRYPTO_DEV_TALITOS tristate "Talitos Freescale Security Engine (SEC)" - select CRYPTO_ALGAPI + select CRYPTO_AEAD select CRYPTO_AUTHENC + select CRYPTO_BLKCIPHER + select CRYPTO_HASH select HW_RANDOM depends on FSL_SOC help @@ -222,11 +240,29 @@ config CRYPTO_DEV_TALITOS To compile this driver as a module, choose M here: the module will be called talitos. +config CRYPTO_DEV_TALITOS1 + bool "SEC1 (SEC 1.0 and SEC Lite 1.2)" + depends on CRYPTO_DEV_TALITOS + depends on PPC_8xx || PPC_82xx + default y + help + Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0 + found on MPC82xx or the Freescale Security Engine (SEC Lite) + version 1.2 found on MPC8xx + +config CRYPTO_DEV_TALITOS2 + bool "SEC2+ (SEC version 2.0 or upper)" + depends on CRYPTO_DEV_TALITOS + default y if !PPC_8xx + help + Say 'Y' here to use the Freescale Security Engine (SEC) + version 2 and following as found on MPC83xx, MPC85xx, etc ... + config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE select CRYPTO_DES - select CRYPTO_ALGAPI + select CRYPTO_AEAD select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER help @@ -236,7 +272,6 @@ config CRYPTO_DEV_PPC4XX tristate "Driver AMCC PPC4xx crypto accelerator" depends on PPC && 4xx select CRYPTO_HASH - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help This option allows you to have support for AMCC crypto acceleration. @@ -257,7 +292,7 @@ config CRYPTO_DEV_OMAP_AES tristate "Support for OMAP AES hw engine" depends on ARCH_OMAP2 || ARCH_OMAP3 || ARCH_OMAP2PLUS select CRYPTO_AES - select CRYPTO_BLKCIPHER2 + select CRYPTO_BLKCIPHER help OMAP processors have AES module accelerator. Select this if you want to use the OMAP module for AES algorithms. @@ -266,7 +301,7 @@ config CRYPTO_DEV_OMAP_DES tristate "Support for OMAP DES3DES hw engine" depends on ARCH_OMAP2PLUS select CRYPTO_DES - select CRYPTO_BLKCIPHER2 + select CRYPTO_BLKCIPHER help OMAP processors have DES/3DES module accelerator. Select this if you want to use the OMAP module for DES and 3DES algorithms. Currently @@ -276,9 +311,10 @@ config CRYPTO_DEV_OMAP_DES config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" depends on ARCH_PICOXCELL && HAVE_CLK + select CRYPTO_AEAD select CRYPTO_AES select CRYPTO_AUTHENC - select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_DES select CRYPTO_CBC select CRYPTO_ECB @@ -304,7 +340,6 @@ config CRYPTO_DEV_S5P tristate "Support for Samsung S5PV210/Exynos crypto accelerator" depends on ARCH_S5PV210 || ARCH_EXYNOS select CRYPTO_AES - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help This option allows you to have support for S5P crypto acceleration. @@ -312,11 +347,13 @@ config CRYPTO_DEV_S5P algorithms execution. config CRYPTO_DEV_NX - bool "Support for IBM Power7+ in-Nest cryptographic acceleration" - depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN - default n + bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration" + depends on PPC64 help - Support for Power7+ in-Nest cryptographic acceleration. + This enables support for the NX hardware cryptographic accelerator + coprocessor that is in IBM PowerPC P7+ or later processors. This + does not actually enable any drivers, it only allows you to select + which acceleration type (encryption and/or compression) to enable. if CRYPTO_DEV_NX source "drivers/crypto/nx/Kconfig" @@ -325,7 +362,6 @@ endif config CRYPTO_DEV_UX500 tristate "Driver for ST-Ericsson UX500 crypto hardware acceleration" depends on ARCH_U8500 - select CRYPTO_ALGAPI help Driver for ST-Ericsson UX500 crypto engine. @@ -343,10 +379,7 @@ config CRYPTO_DEV_BFIN_CRC config CRYPTO_DEV_ATMEL_AES tristate "Support for Atmel AES hw accelerator" depends on ARCH_AT91 - select CRYPTO_CBC - select CRYPTO_ECB select CRYPTO_AES - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER select AT_HDMAC help @@ -361,9 +394,6 @@ config CRYPTO_DEV_ATMEL_TDES tristate "Support for Atmel DES/TDES hw accelerator" depends on ARCH_AT91 select CRYPTO_DES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help Some Atmel processors have DES/TDES hw accelerator. @@ -376,10 +406,7 @@ config CRYPTO_DEV_ATMEL_TDES config CRYPTO_DEV_ATMEL_SHA tristate "Support for Atmel SHA hw accelerator" depends on ARCH_AT91 - select CRYPTO_SHA1 - select CRYPTO_SHA256 - select CRYPTO_SHA512 - select CRYPTO_ALGAPI + select CRYPTO_HASH help Some Atmel processors have SHA1/SHA224/SHA256/SHA384/SHA512 hw accelerator. @@ -392,7 +419,6 @@ config CRYPTO_DEV_ATMEL_SHA config CRYPTO_DEV_CCP bool "Support for AMD Cryptographic Coprocessor" depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM - default n help The AMD Cryptographic Coprocessor provides hardware support for encryption, hashing and related operations. @@ -404,13 +430,11 @@ endif config CRYPTO_DEV_MXS_DCP tristate "Support for Freescale MXS DCP" depends on ARCH_MXS - select CRYPTO_SHA1 - select CRYPTO_SHA256 select CRYPTO_CBC select CRYPTO_ECB select CRYPTO_AES select CRYPTO_BLKCIPHER - select CRYPTO_ALGAPI + select CRYPTO_HASH help The Freescale i.MX23/i.MX28 has SHA1/SHA256 and AES128 CBC/ECB co-processor on the die. @@ -429,7 +453,6 @@ config CRYPTO_DEV_QCE select CRYPTO_CBC select CRYPTO_XTS select CRYPTO_CTR - select CRYPTO_ALGAPI select CRYPTO_BLKCIPHER help This driver supports Qualcomm crypto engine accelerator @@ -439,7 +462,6 @@ config CRYPTO_DEV_QCE config CRYPTO_DEV_VMX bool "Support for VMX cryptographic acceleration instructions" depends on PPC64 - default n help Support for VMX cryptographic acceleration instructions. @@ -449,7 +471,6 @@ config CRYPTO_DEV_IMGTEC_HASH tristate "Imagination Technologies hardware hash accelerator" depends on MIPS || COMPILE_TEST depends on HAS_DMA - select CRYPTO_ALGAPI select CRYPTO_MD5 select CRYPTO_SHA1 select CRYPTO_SHA256 diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index fb84be7..e35c07a 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index e7555ff..e286e28 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -45,7 +45,6 @@ config CRYPTO_DEV_FSL_CAAM_RINGSIZE config CRYPTO_DEV_FSL_CAAM_INTC bool "Job Ring interrupt coalescing" depends on CRYPTO_DEV_FSL_CAAM_JR - default n help Enable the Job Ring's interrupt coalescing feature. @@ -77,8 +76,9 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API tristate "Register algorithm implementations with the Crypto API" depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR default y - select CRYPTO_ALGAPI + select CRYPTO_AEAD select CRYPTO_AUTHENC + select CRYPTO_BLKCIPHER help Selecting this will offload crypto for users of the scatterlist crypto API (such as the linux native IPSec @@ -115,7 +115,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM - default n help Selecting this will enable printing of various debug information in the CAAM driver. diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 29071a1..daca933 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -65,6 +65,10 @@ /* max IV is max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ #define CAAM_MAX_IV_LENGTH 16 +#define AEAD_DESC_JOB_IO_LEN (DESC_JOB_IO_LEN + CAAM_CMD_SZ * 2) +#define GCM_DESC_JOB_IO_LEN (AEAD_DESC_JOB_IO_LEN + \ + CAAM_CMD_SZ * 4) + /* length of descriptors text */ #define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) #define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) @@ -79,18 +83,16 @@ #define DESC_AEAD_NULL_DEC_LEN (DESC_AEAD_NULL_BASE + 17 * CAAM_CMD_SZ) #define DESC_GCM_BASE (3 * CAAM_CMD_SZ) -#define DESC_GCM_ENC_LEN (DESC_GCM_BASE + 23 * CAAM_CMD_SZ) -#define DESC_GCM_DEC_LEN (DESC_GCM_BASE + 19 * CAAM_CMD_SZ) +#define DESC_GCM_ENC_LEN (DESC_GCM_BASE + 16 * CAAM_CMD_SZ) +#define DESC_GCM_DEC_LEN (DESC_GCM_BASE + 12 * CAAM_CMD_SZ) #define DESC_RFC4106_BASE (3 * CAAM_CMD_SZ) -#define DESC_RFC4106_ENC_LEN (DESC_RFC4106_BASE + 15 * CAAM_CMD_SZ) -#define DESC_RFC4106_DEC_LEN (DESC_RFC4106_BASE + 14 * CAAM_CMD_SZ) -#define DESC_RFC4106_GIVENC_LEN (DESC_RFC4106_BASE + 21 * CAAM_CMD_SZ) +#define DESC_RFC4106_ENC_LEN (DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ) +#define DESC_RFC4106_DEC_LEN (DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ) #define DESC_RFC4543_BASE (3 * CAAM_CMD_SZ) -#define DESC_RFC4543_ENC_LEN (DESC_RFC4543_BASE + 25 * CAAM_CMD_SZ) -#define DESC_RFC4543_DEC_LEN (DESC_RFC4543_BASE + 27 * CAAM_CMD_SZ) -#define DESC_RFC4543_GIVENC_LEN (DESC_RFC4543_BASE + 30 * CAAM_CMD_SZ) +#define DESC_RFC4543_ENC_LEN (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ) +#define DESC_RFC4543_DEC_LEN (DESC_RFC4543_BASE + 12 * CAAM_CMD_SZ) #define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) #define DESC_ABLKCIPHER_ENC_LEN (DESC_ABLKCIPHER_BASE + \ @@ -98,8 +100,7 @@ #define DESC_ABLKCIPHER_DEC_LEN (DESC_ABLKCIPHER_BASE + \ 15 * CAAM_CMD_SZ) -#define DESC_MAX_USED_BYTES (DESC_RFC4543_GIVENC_LEN + \ - CAAM_MAX_KEY_SIZE) +#define DESC_MAX_USED_BYTES (CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) #define DESC_MAX_USED_LEN (DESC_MAX_USED_BYTES / CAAM_CMD_SZ) #ifdef DEBUG @@ -258,7 +259,7 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, static int aead_null_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -273,7 +274,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; - /* aead_encrypt shared descriptor */ + /* old_aead_encrypt shared descriptor */ desc = ctx->sh_desc_enc; init_sh_desc(desc, HDR_SHARE_SERIAL); @@ -362,7 +363,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) desc = ctx->sh_desc_dec; - /* aead_decrypt shared descriptor */ + /* old_aead_decrypt shared descriptor */ init_sh_desc(desc, HDR_SHARE_SERIAL); /* Skip if already shared */ @@ -383,7 +384,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); @@ -449,7 +450,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) static int aead_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct crypto_tfm *ctfm = crypto_aead_tfm(aead); const char *alg_name = crypto_tfm_alg_name(ctfm); @@ -496,7 +497,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) CAAM_DESC_BYTES_MAX) keys_fit_inline = true; - /* aead_encrypt shared descriptor */ + /* old_aead_encrypt shared descriptor */ desc = ctx->sh_desc_enc; /* Note: Context registers are saved. */ @@ -510,7 +511,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); @@ -518,7 +519,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* read assoc before reading payload */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); - aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); + aead_append_ld_iv(desc, ivsize, ctx1_iv_off); /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) @@ -565,7 +566,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) CAAM_DESC_BYTES_MAX) keys_fit_inline = true; - /* aead_decrypt shared descriptor */ + /* old_aead_decrypt shared descriptor */ desc = ctx->sh_desc_dec; /* Note: Context registers are saved. */ @@ -577,7 +578,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); @@ -586,7 +587,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); - aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); + aead_append_ld_iv(desc, ivsize, ctx1_iv_off); /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) @@ -645,20 +646,20 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Generate IV */ geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX | (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); /* Copy IV to class 1 context */ append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO | (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Return to encryption */ append_operation(desc, ctx->class2_alg_type | @@ -676,10 +677,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Copy iv from outfifo to class 2 fifo */ moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 | - NFIFOENTRY_DTYPE_MSG | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_load_imm_u32(desc, tfm->ivsize, LDST_CLASS_2_CCB | + append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB | LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM); /* Load Counter into CONTEXT1 reg */ @@ -698,7 +699,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); /* Not need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP); /* Will read cryptlen */ @@ -738,7 +739,6 @@ static int aead_setauthsize(struct crypto_aead *authenc, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -754,7 +754,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_GCM_ENC_LEN + DESC_JOB_IO_LEN + + if (DESC_GCM_ENC_LEN + GCM_DESC_JOB_IO_LEN + ctx->enckeylen <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; @@ -777,34 +777,34 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + /* if assoclen + cryptlen is ZERO, skip to ICV write */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); - /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + /* if assoclen is ZERO, skip reading the assoc data */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); - /* assoclen = (assoclen + cryptlen) - cryptlen */ - append_math_sub(desc, REG1, REG2, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); + + /* cryptlen = seqinlen - assoclen */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ); /* if cryptlen is ZERO jump to zero-payload commands */ - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_MATH_Z); - /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); - - /* if assoclen is ZERO, skip reading the assoc data */ - append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); - zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); /* read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); set_jump_tgt_here(desc, zero_assoc_jump_cmd1); - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); /* write encrypted data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); @@ -814,31 +814,17 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); /* jump the zero-payload commands */ - append_jump(desc, JUMP_TEST_ALL | 7); + append_jump(desc, JUMP_TEST_ALL | 2); /* zero-payload commands */ set_jump_tgt_here(desc, zero_payload_jump_cmd); - /* if assoclen is ZERO, jump to IV reading - is the only input data */ - append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); - zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); - /* read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1); - /* jump to ICV writing */ - append_jump(desc, JUMP_TEST_ALL | 2); - - /* read IV - is the only input data */ + /* There is no input data */ set_jump_tgt_here(desc, zero_assoc_jump_cmd2); - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | - FIFOLD_TYPE_LAST1); /* write ICV */ append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | @@ -862,7 +848,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) * must all fit into the 64-word Descriptor h/w Buffer */ keys_fit_inline = false; - if (DESC_GCM_DEC_LEN + DESC_JOB_IO_LEN + + if (DESC_GCM_DEC_LEN + GCM_DESC_JOB_IO_LEN + ctx->enckeylen <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; @@ -886,33 +872,30 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ - append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); - - /* assoclen = (assoclen + cryptlen) - cryptlen */ - append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); - append_math_sub(desc, REG1, REG3, REG2, CAAM_CMD_SZ); + /* if assoclen is ZERO, skip reading the assoc data */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); - /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - /* jump to zero-payload command if cryptlen is zero */ - append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); - zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); + /* skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); - append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); - /* if asoclen is ZERO, skip reading assoc data */ - zero_assoc_jump_cmd1 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); /* read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + set_jump_tgt_here(desc, zero_assoc_jump_cmd1); - append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); + /* cryptlen = seqoutlen - assoclen */ + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + + /* jump to zero-payload command if cryptlen is zero */ + zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | + JUMP_COND_MATH_Z); + + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); /* store encrypted data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); @@ -921,21 +904,9 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); - /* jump the zero-payload commands */ - append_jump(desc, JUMP_TEST_ALL | 4); - /* zero-payload command */ set_jump_tgt_here(desc, zero_payload_jump_cmd); - /* if assoclen is ZERO, jump to ICV reading */ - append_math_add(desc, VARSEQINLEN, ZERO, REG1, CAAM_CMD_SZ); - zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | - JUMP_COND_MATH_Z); - /* read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - set_jump_tgt_here(desc, zero_assoc_jump_cmd2); - /* read ICV */ append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_ICV | FIFOLD_TYPE_LAST1); @@ -968,13 +939,11 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; - u32 *key_jump_cmd, *move_cmd, *write_iv_cmd; + u32 *key_jump_cmd; u32 *desc; - u32 geniv; if (!ctx->enckeylen || !ctx->authsize) return 0; @@ -984,7 +953,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_RFC4106_ENC_LEN + DESC_JOB_IO_LEN + + if (DESC_RFC4106_ENC_LEN + GCM_DESC_JOB_IO_LEN + ctx->enckeylen <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; @@ -1007,29 +976,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); - - /* assoclen = (assoclen + cryptlen) - cryptlen */ - append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); - - /* Read Salt */ - append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), - 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); - /* Read AES-GCM-ESP IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); /* Read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + /* cryptlen = seqoutlen - assoclen */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + /* Will read cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); /* Write encrypted data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); @@ -1083,30 +1044,21 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ - append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); - - /* assoclen = (assoclen + cryptlen) - cryptlen */ - append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); - append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); - - /* Will write cryptlen bytes */ - append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); - /* Read Salt */ - append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), - 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); - /* Read AES-GCM-ESP IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); + /* Skip assoc data */ + append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); /* Read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); + /* Will write cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); + /* Will read cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); /* Store payload data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); @@ -1132,107 +1084,6 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) desc_bytes(desc), 1); #endif - /* - * Job Descriptor and Shared Descriptors - * must all fit into the 64-word Descriptor h/w Buffer - */ - keys_fit_inline = false; - if (DESC_RFC4106_GIVENC_LEN + DESC_JOB_IO_LEN + - ctx->split_key_pad_len + ctx->enckeylen <= - CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - /* rfc4106_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Generate IV */ - geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | - NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); - append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | - LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - move_cmd = append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - - /* Copy generated IV to OFIFO */ - write_iv_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_OUTFIFO | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - - /* assoclen = seqinlen - (ivsize + cryptlen) */ - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); - - /* Will write ivsize + cryptlen */ - append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ); - - /* Read Salt and generated IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | - FIFOLD_TYPE_FLUSH1 | IMMEDIATE | 12); - /* Append Salt */ - append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); - set_move_tgt_here(desc, move_cmd); - set_move_tgt_here(desc, write_iv_cmd); - /* Blank commands. Will be overwritten by generated IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ - - /* No need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1); - - /* Will read cryptlen */ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Store generated IV and encrypted data */ - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - - /* Read payload data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1); - - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT); - - ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "rfc4106 givenc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif - return 0; } @@ -1249,14 +1100,12 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; - u32 *key_jump_cmd, *write_iv_cmd, *write_aad_cmd; + u32 *key_jump_cmd; u32 *read_move_cmd, *write_move_cmd; u32 *desc; - u32 geniv; if (!ctx->enckeylen || !ctx->authsize) return 0; @@ -1266,7 +1115,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) * Job Descriptor and Shared Descriptor * must fit into the 64-word Descriptor h/w Buffer */ - if (DESC_RFC4543_ENC_LEN + DESC_JOB_IO_LEN + + if (DESC_RFC4543_ENC_LEN + GCM_DESC_JOB_IO_LEN + ctx->enckeylen <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; @@ -1289,48 +1138,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* Load AES-GMAC ESP IV into Math1 register */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | - LDST_CLASS_DECO | tfm->ivsize); - - /* Wait the DMA transaction to finish */ - append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | - (1 << JUMP_OFFSET_SHIFT)); - - /* Overwrite blank immediate AES-GMAC ESP IV data */ - write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Overwrite blank immediate AAD data */ - write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - - /* assoclen = (seqinlen - ivsize) - cryptlen */ - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); - - /* Read Salt and AES-GMAC ESP IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); - /* Append Salt */ - append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); - set_move_tgt_here(desc, write_iv_cmd); - /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD); - - /* Will read cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); - - /* Will write cryptlen bytes */ - append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + /* assoclen + cryptlen = seqinlen */ + append_math_sub(desc, REG3, SEQINLEN, REG0, CAAM_CMD_SZ); /* * MOVE_LEN opcode is not available in all SEC HW revisions, @@ -1342,16 +1151,13 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | (0x8 << MOVE_LEN_SHIFT)); - /* Authenticate AES-GMAC ESP IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); - set_move_tgt_here(desc, write_aad_cmd); - /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ + /* Will read assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - /* Read and write cryptlen bytes */ + /* Will write assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); + + /* Read and write assoclen + cryptlen bytes */ aead_append_src_dst(desc, FIFOLD_TYPE_AAD); set_move_tgt_here(desc, read_move_cmd); @@ -1382,7 +1188,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) * must all fit into the 64-word Descriptor h/w Buffer */ keys_fit_inline = false; - if (DESC_RFC4543_DEC_LEN + DESC_JOB_IO_LEN + + if (DESC_RFC4543_DEC_LEN + GCM_DESC_JOB_IO_LEN + ctx->enckeylen <= CAAM_DESC_BYTES_MAX) keys_fit_inline = true; @@ -1405,28 +1211,8 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class1_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - /* Load AES-GMAC ESP IV into Math1 register */ - append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | - LDST_CLASS_DECO | tfm->ivsize); - - /* Wait the DMA transaction to finish */ - append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | - (1 << JUMP_OFFSET_SHIFT)); - - /* assoclen + cryptlen = (seqinlen - ivsize) - icvsize */ - append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, ctx->authsize); - - /* Overwrite blank immediate AES-GMAC ESP IV data */ - write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Overwrite blank immediate AAD data */ - write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* assoclen = (assoclen + cryptlen) - cryptlen */ - append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); - append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); + /* assoclen + cryptlen = seqoutlen */ + append_math_sub(desc, REG3, SEQOUTLEN, REG0, CAAM_CMD_SZ); /* * MOVE_LEN opcode is not available in all SEC HW revisions, @@ -1438,40 +1224,16 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | (0x8 << MOVE_LEN_SHIFT)); - /* Read Salt and AES-GMAC ESP IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); - /* Append Salt */ - append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); - set_move_tgt_here(desc, write_iv_cmd); - /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD); - - /* Will read cryptlen bytes */ - append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); + /* Will read assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); - /* Will write cryptlen bytes */ - append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); - - /* Authenticate AES-GMAC ESP IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); - set_move_tgt_here(desc, write_aad_cmd); - /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ + /* Will write assoclen + cryptlen bytes */ + append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ); /* Store payload data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF); - /* In-snoop cryptlen data */ + /* In-snoop assoclen + cryptlen data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | FIFOLDST_VLF | FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST2FLUSH1); @@ -1499,135 +1261,6 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) desc_bytes(desc), 1); #endif - /* - * Job Descriptor and Shared Descriptors - * must all fit into the 64-word Descriptor h/w Buffer - */ - keys_fit_inline = false; - if (DESC_RFC4543_GIVENC_LEN + DESC_JOB_IO_LEN + - ctx->enckeylen <= CAAM_DESC_BYTES_MAX) - keys_fit_inline = true; - - /* rfc4543_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; - - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip key loading if it is loaded due to sharing */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - if (keys_fit_inline) - append_key_as_imm(desc, (void *)ctx->key, ctx->enckeylen, - ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - else - append_key(desc, ctx->key_dma, ctx->enckeylen, - CLASS_1 | KEY_DEST_CLASS_REG); - set_jump_tgt_here(desc, key_jump_cmd); - - /* Generate IV */ - geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | - NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); - append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | - LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - /* Move generated IV to Math1 register */ - append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_MATH1 | - (tfm->ivsize << MOVE_LEN_SHIFT)); - append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); - - /* Overwrite blank immediate AES-GMAC IV data */ - write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Overwrite blank immediate AAD data */ - write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Copy generated IV to OFIFO */ - append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_OUTFIFO | - (tfm->ivsize << MOVE_LEN_SHIFT)); - - /* Class 1 operation */ - append_operation(desc, ctx->class1_alg_type | - OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - - /* assoclen = seqinlen - (ivsize + cryptlen) */ - append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); - - /* Will write ivsize + cryptlen */ - append_math_add(desc, VARSEQOUTLEN, REG3, REG0, CAAM_CMD_SZ); - - /* - * MOVE_LEN opcode is not available in all SEC HW revisions, - * thus need to do some magic, i.e. self-patch the descriptor - * buffer. - */ - read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_MATH3 | - (0x6 << MOVE_LEN_SHIFT)); - write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | MOVE_DEST_DESCBUF | - (0x8 << MOVE_LEN_SHIFT)); - - /* Read Salt and AES-GMAC generated IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); - /* Append Salt */ - append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); - set_move_tgt_here(desc, write_iv_cmd); - /* Blank commands. Will be overwritten by AES-GMAC generated IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ - - /* No need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); - - /* Read assoc data */ - append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | - FIFOLD_TYPE_AAD); - - /* Will read cryptlen */ - append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ); - - /* Authenticate AES-GMAC IV */ - append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); - set_move_tgt_here(desc, write_aad_cmd); - /* Blank commands. Will be overwritten by AES-GMAC IV. */ - append_cmd(desc, 0x00000000); - append_cmd(desc, 0x00000000); - /* End of blank commands */ - - /* Read and write cryptlen bytes */ - aead_append_src_dst(desc, FIFOLD_TYPE_AAD); - - set_move_tgt_here(desc, read_move_cmd); - set_move_tgt_here(desc, write_move_cmd); - append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); - /* Move payload data to OFIFO */ - append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO); - - /* Write ICV */ - append_seq_store(desc, ctx->authsize, LDST_CLASS_1_CCB | - LDST_SRCDST_BYTE_CONTEXT); - - ctx->sh_desc_givenc_dma = dma_map_single(jrdev, desc, - desc_bytes(desc), - DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { - dev_err(jrdev, "unable to map shared descriptor\n"); - return -ENOMEM; - } -#ifdef DEBUG - print_hex_dump(KERN_ERR, - "rfc4543 givenc shdesc@"__stringify(__LINE__)": ", - DUMP_PREFIX_ADDRESS, 16, 4, desc, - desc_bytes(desc), 1); -#endif - return 0; } @@ -2100,7 +1733,7 @@ struct aead_edesc { int sec4_sg_bytes; dma_addr_t sec4_sg_dma; struct sec4_sg_entry *sec4_sg; - u32 hw_desc[0]; + u32 hw_desc[]; }; /* @@ -2154,6 +1787,16 @@ static void aead_unmap(struct device *dev, struct aead_edesc *edesc, struct aead_request *req) { + caam_unmap(dev, req->src, req->dst, + edesc->src_nents, edesc->src_chained, edesc->dst_nents, + edesc->dst_chained, 0, 0, + edesc->sec4_sg_dma, edesc->sec4_sg_bytes); +} + +static void old_aead_unmap(struct device *dev, + struct aead_edesc *edesc, + struct aead_request *req) +{ struct crypto_aead *aead = crypto_aead_reqtfm(req); int ivsize = crypto_aead_ivsize(aead); @@ -2184,6 +1827,28 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, { struct aead_request *req = context; struct aead_edesc *edesc; + +#ifdef DEBUG + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = container_of(desc, struct aead_edesc, hw_desc[0]); + + if (err) + caam_jr_strstatus(jrdev, err); + + aead_unmap(jrdev, edesc, req); + + kfree(edesc); + + aead_request_complete(req, err); +} + +static void old_aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct aead_request *req = context; + struct aead_edesc *edesc; #ifdef DEBUG struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -2198,7 +1863,7 @@ static void aead_encrypt_done(struct device *jrdev, u32 *desc, u32 err, if (err) caam_jr_strstatus(jrdev, err); - aead_unmap(jrdev, edesc, req); + old_aead_unmap(jrdev, edesc, req); #ifdef DEBUG print_hex_dump(KERN_ERR, "assoc @"__stringify(__LINE__)": ", @@ -2223,6 +1888,34 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, { struct aead_request *req = context; struct aead_edesc *edesc; + +#ifdef DEBUG + dev_err(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err); +#endif + + edesc = container_of(desc, struct aead_edesc, hw_desc[0]); + + if (err) + caam_jr_strstatus(jrdev, err); + + aead_unmap(jrdev, edesc, req); + + /* + * verify hw auth check passed else return -EBADMSG + */ + if ((err & JRSTA_CCBERR_ERRID_MASK) == JRSTA_CCBERR_ERRID_ICVCHK) + err = -EBADMSG; + + kfree(edesc); + + aead_request_complete(req, err); +} + +static void old_aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, + void *context) +{ + struct aead_request *req = context; + struct aead_edesc *edesc; #ifdef DEBUG struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -2246,7 +1939,7 @@ static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err, if (err) caam_jr_strstatus(jrdev, err); - aead_unmap(jrdev, edesc, req); + old_aead_unmap(jrdev, edesc, req); /* * verify hw auth check passed else return -EBADMSG @@ -2342,10 +2035,10 @@ static void ablkcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err, /* * Fill in aead job descriptor */ -static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, - struct aead_edesc *edesc, - struct aead_request *req, - bool all_contig, bool encrypt) +static void old_init_aead_job(u32 *sh_desc, dma_addr_t ptr, + struct aead_edesc *edesc, + struct aead_request *req, + bool all_contig, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -2425,6 +2118,97 @@ static void init_aead_job(u32 *sh_desc, dma_addr_t ptr, } /* + * Fill in aead job descriptor + */ +static void init_aead_job(struct aead_request *req, + struct aead_edesc *edesc, + bool all_contig, bool encrypt) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + int authsize = ctx->authsize; + u32 *desc = edesc->hw_desc; + u32 out_options, in_options; + dma_addr_t dst_dma, src_dma; + int len, sec4_sg_index = 0; + dma_addr_t ptr; + u32 *sh_desc; + + sh_desc = encrypt ? ctx->sh_desc_enc : ctx->sh_desc_dec; + ptr = encrypt ? ctx->sh_desc_enc_dma : ctx->sh_desc_dec_dma; + + len = desc_len(sh_desc); + init_job_desc_shared(desc, ptr, len, HDR_SHARE_DEFER | HDR_REVERSE); + + if (all_contig) { + src_dma = sg_dma_address(req->src); + in_options = 0; + } else { + src_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + in_options = LDST_SGF; + } + + append_seq_in_ptr(desc, src_dma, req->assoclen + req->cryptlen, + in_options); + + dst_dma = src_dma; + out_options = in_options; + + if (unlikely(req->src != req->dst)) { + if (!edesc->dst_nents) { + dst_dma = sg_dma_address(req->dst); + } else { + dst_dma = edesc->sec4_sg_dma + + sec4_sg_index * + sizeof(struct sec4_sg_entry); + out_options = LDST_SGF; + } + } + + if (encrypt) + append_seq_out_ptr(desc, dst_dma, + req->assoclen + req->cryptlen + authsize, + out_options); + else + append_seq_out_ptr(desc, dst_dma, + req->assoclen + req->cryptlen - authsize, + out_options); + + /* REG3 = assoclen */ + append_math_add_imm_u32(desc, REG3, ZERO, IMM, req->assoclen); +} + +static void init_gcm_job(struct aead_request *req, + struct aead_edesc *edesc, + bool all_contig, bool encrypt) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + u32 *desc = edesc->hw_desc; + bool generic_gcm = (ivsize == 12); + unsigned int last; + + init_aead_job(req, edesc, all_contig, encrypt); + + /* BUG This should not be specific to generic GCM. */ + last = 0; + if (encrypt && generic_gcm && !(req->assoclen + req->cryptlen)) + last = FIFOLD_TYPE_LAST1; + + /* Read GCM IV */ + append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | + FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | 12 | last); + /* Append Salt */ + if (!generic_gcm) + append_data(desc, ctx->key + ctx->enckeylen, 4); + /* Append IV */ + append_data(desc, req->iv, ivsize); + /* End of blank commands */ +} + +/* * Fill in aead givencrypt job descriptor */ static void init_aead_giv_job(u32 *sh_desc, dma_addr_t ptr, @@ -2608,9 +2392,10 @@ static void init_ablkcipher_giv_job(u32 *sh_desc, dma_addr_t ptr, /* * allocate and map the aead extended descriptor */ -static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, - int desc_bytes, bool *all_contig_ptr, - bool encrypt) +static struct aead_edesc *old_aead_edesc_alloc(struct aead_request *req, + int desc_bytes, + bool *all_contig_ptr, + bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct caam_ctx *ctx = crypto_aead_ctx(aead); @@ -2713,10 +2498,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, sec4_sg_index = 0; if (!all_contig) { if (!is_gcm) { - sg_to_sec4_sg(req->assoc, - assoc_nents, - edesc->sec4_sg + - sec4_sg_index, 0); + sg_to_sec4_sg_len(req->assoc, req->assoclen, + edesc->sec4_sg + sec4_sg_index); sec4_sg_index += assoc_nents; } @@ -2725,10 +2508,8 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, sec4_sg_index += 1; if (is_gcm) { - sg_to_sec4_sg(req->assoc, - assoc_nents, - edesc->sec4_sg + - sec4_sg_index, 0); + sg_to_sec4_sg_len(req->assoc, req->assoclen, + edesc->sec4_sg + sec4_sg_index); sec4_sg_index += assoc_nents; } @@ -2752,7 +2533,124 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, return edesc; } -static int aead_encrypt(struct aead_request *req) +/* + * allocate and map the aead extended descriptor + */ +static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, + int desc_bytes, bool *all_contig_ptr, + bool encrypt) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + int src_nents, dst_nents = 0; + struct aead_edesc *edesc; + int sgc; + bool all_contig = true; + bool src_chained = false, dst_chained = false; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + unsigned int authsize = ctx->authsize; + + if (unlikely(req->dst != req->src)) { + src_nents = sg_count(req->src, req->assoclen + req->cryptlen, + &src_chained); + dst_nents = sg_count(req->dst, + req->assoclen + req->cryptlen + + (encrypt ? authsize : (-authsize)), + &dst_chained); + } else { + src_nents = sg_count(req->src, + req->assoclen + req->cryptlen + + (encrypt ? authsize : 0), + &src_chained); + } + + /* Check if data are contiguous. */ + all_contig = !src_nents; + if (!all_contig) { + src_nents = src_nents ? : 1; + sec4_sg_len = src_nents; + } + + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc and hw desc commands, link tables */ + edesc = kzalloc(sizeof(struct aead_edesc) + desc_bytes + + sec4_sg_bytes, GFP_DMA | flags); + if (!edesc) { + dev_err(jrdev, "could not allocate extended descriptor\n"); + return ERR_PTR(-ENOMEM); + } + + if (likely(req->src == req->dst)) { + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_BIDIRECTIONAL, src_chained); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } + } else { + sgc = dma_map_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, src_chained); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map source\n"); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } + + sgc = dma_map_sg_chained(jrdev, req->dst, dst_nents ? : 1, + DMA_FROM_DEVICE, dst_chained); + if (unlikely(!sgc)) { + dev_err(jrdev, "unable to map destination\n"); + dma_unmap_sg_chained(jrdev, req->src, src_nents ? : 1, + DMA_TO_DEVICE, src_chained); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } + } + + edesc->src_nents = src_nents; + edesc->src_chained = src_chained; + edesc->dst_nents = dst_nents; + edesc->dst_chained = dst_chained; + edesc->sec4_sg = (void *)edesc + sizeof(struct aead_edesc) + + desc_bytes; + *all_contig_ptr = all_contig; + + sec4_sg_index = 0; + if (!all_contig) { + sg_to_sec4_sg_last(req->src, src_nents, + edesc->sec4_sg + sec4_sg_index, 0); + sec4_sg_index += src_nents; + } + if (dst_nents) { + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + } + + if (!sec4_sg_bytes) + return edesc; + + edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, edesc->sec4_sg_dma)) { + dev_err(jrdev, "unable to map S/G table\n"); + aead_unmap(jrdev, edesc, req); + kfree(edesc); + return ERR_PTR(-ENOMEM); + } + + edesc->sec4_sg_bytes = sec4_sg_bytes; + + return edesc; +} + +static int gcm_encrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -2763,14 +2661,12 @@ static int aead_encrypt(struct aead_request *req) int ret = 0; /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN * - CAAM_CMD_SZ, &all_contig, true); + edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, true); if (IS_ERR(edesc)) return PTR_ERR(edesc); /* Create and submit job descriptor */ - init_aead_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req, - all_contig, true); + init_gcm_job(req, edesc, all_contig, true); #ifdef DEBUG print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, @@ -2789,7 +2685,7 @@ static int aead_encrypt(struct aead_request *req) return ret; } -static int aead_decrypt(struct aead_request *req) +static int old_aead_encrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); @@ -2800,8 +2696,80 @@ static int aead_decrypt(struct aead_request *req) int ret = 0; /* allocate extended descriptor */ - edesc = aead_edesc_alloc(req, DESC_JOB_IO_LEN * - CAAM_CMD_SZ, &all_contig, false); + edesc = old_aead_edesc_alloc(req, DESC_JOB_IO_LEN * + CAAM_CMD_SZ, &all_contig, true); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Create and submit job descriptor */ + old_init_aead_job(ctx->sh_desc_enc, ctx->sh_desc_enc_dma, edesc, req, + all_contig, true); +#ifdef DEBUG + print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); +#endif + + desc = edesc->hw_desc; + ret = caam_jr_enqueue(jrdev, desc, old_aead_encrypt_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + old_aead_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + +static int gcm_decrypt(struct aead_request *req) +{ + struct aead_edesc *edesc; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + bool all_contig; + u32 *desc; + int ret = 0; + + /* allocate extended descriptor */ + edesc = aead_edesc_alloc(req, GCM_DESC_JOB_IO_LEN, &all_contig, false); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Create and submit job descriptor*/ + init_gcm_job(req, edesc, all_contig, false); +#ifdef DEBUG + print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, + desc_bytes(edesc->hw_desc), 1); +#endif + + desc = edesc->hw_desc; + ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); + if (!ret) { + ret = -EINPROGRESS; + } else { + aead_unmap(jrdev, edesc, req); + kfree(edesc); + } + + return ret; +} + +static int old_aead_decrypt(struct aead_request *req) +{ + struct aead_edesc *edesc; + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + bool all_contig; + u32 *desc; + int ret = 0; + + /* allocate extended descriptor */ + edesc = old_aead_edesc_alloc(req, DESC_JOB_IO_LEN * + CAAM_CMD_SZ, &all_contig, false); if (IS_ERR(edesc)) return PTR_ERR(edesc); @@ -2812,8 +2780,8 @@ static int aead_decrypt(struct aead_request *req) #endif /* Create and submit job descriptor*/ - init_aead_job(ctx->sh_desc_dec, - ctx->sh_desc_dec_dma, edesc, req, all_contig, false); + old_init_aead_job(ctx->sh_desc_dec, + ctx->sh_desc_dec_dma, edesc, req, all_contig, false); #ifdef DEBUG print_hex_dump(KERN_ERR, "aead jobdesc@"__stringify(__LINE__)": ", DUMP_PREFIX_ADDRESS, 16, 4, edesc->hw_desc, @@ -2821,11 +2789,11 @@ static int aead_decrypt(struct aead_request *req) #endif desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_decrypt_done, req); + ret = caam_jr_enqueue(jrdev, desc, old_aead_decrypt_done, req); if (!ret) { ret = -EINPROGRESS; } else { - aead_unmap(jrdev, edesc, req); + old_aead_unmap(jrdev, edesc, req); kfree(edesc); } @@ -2953,8 +2921,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request sec4_sg_index = 0; if (!(contig & GIV_SRC_CONTIG)) { if (!is_gcm) { - sg_to_sec4_sg(req->assoc, assoc_nents, - edesc->sec4_sg + sec4_sg_index, 0); + sg_to_sec4_sg_len(req->assoc, req->assoclen, + edesc->sec4_sg + sec4_sg_index); sec4_sg_index += assoc_nents; } @@ -2963,8 +2931,8 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request sec4_sg_index += 1; if (is_gcm) { - sg_to_sec4_sg(req->assoc, assoc_nents, - edesc->sec4_sg + sec4_sg_index, 0); + sg_to_sec4_sg_len(req->assoc, req->assoclen, + edesc->sec4_sg + sec4_sg_index); sec4_sg_index += assoc_nents; } @@ -2999,7 +2967,7 @@ static struct aead_edesc *aead_giv_edesc_alloc(struct aead_givcrypt_request return edesc; } -static int aead_givencrypt(struct aead_givcrypt_request *areq) +static int old_aead_givencrypt(struct aead_givcrypt_request *areq) { struct aead_request *req = &areq->areq; struct aead_edesc *edesc; @@ -3033,11 +3001,11 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq) #endif desc = edesc->hw_desc; - ret = caam_jr_enqueue(jrdev, desc, aead_encrypt_done, req); + ret = caam_jr_enqueue(jrdev, desc, old_aead_encrypt_done, req); if (!ret) { ret = -EINPROGRESS; } else { - aead_unmap(jrdev, edesc, req); + old_aead_unmap(jrdev, edesc, req); kfree(edesc); } @@ -3046,7 +3014,7 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq) static int aead_null_givencrypt(struct aead_givcrypt_request *areq) { - return aead_encrypt(&areq->areq); + return old_aead_encrypt(&areq->areq); } /* @@ -3379,11 +3347,7 @@ struct caam_alg_template { u32 type; union { struct ablkcipher_alg ablkcipher; - struct aead_alg aead; - struct blkcipher_alg blkcipher; - struct cipher_alg cipher; - struct compress_alg compress; - struct rng_alg rng; + struct old_aead_alg aead; } template_u; u32 class1_alg_type; u32 class2_alg_type; @@ -3400,8 +3364,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3419,8 +3383,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3438,8 +3402,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3458,8 +3422,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3478,8 +3442,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3498,8 +3462,8 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, .givencrypt = aead_null_givencrypt, .geniv = "<built-in>", .ivsize = NULL_IV_SIZE, @@ -3518,9 +3482,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, @@ -3537,9 +3501,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, @@ -3556,9 +3520,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, @@ -3576,9 +3540,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, @@ -3596,9 +3560,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, @@ -3617,9 +3581,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = AES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, @@ -3637,9 +3601,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, @@ -3656,9 +3620,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, @@ -3675,9 +3639,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, @@ -3695,9 +3659,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, @@ -3715,9 +3679,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, @@ -3735,9 +3699,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES3_EDE_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, @@ -3755,9 +3719,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = MD5_DIGEST_SIZE, @@ -3774,9 +3738,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, @@ -3793,9 +3757,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, @@ -3813,9 +3777,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, @@ -3833,9 +3797,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, @@ -3853,9 +3817,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = DES_BLOCK_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, @@ -3873,9 +3837,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = MD5_DIGEST_SIZE, @@ -3892,9 +3856,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA1_DIGEST_SIZE, @@ -3911,9 +3875,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA224_DIGEST_SIZE, @@ -3931,9 +3895,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA256_DIGEST_SIZE, @@ -3951,9 +3915,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA384_DIGEST_SIZE, @@ -3971,9 +3935,9 @@ static struct caam_alg_template driver_algs[] = { .template_aead = { .setkey = aead_setkey, .setauthsize = aead_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, + .encrypt = old_aead_encrypt, + .decrypt = old_aead_decrypt, + .givencrypt = old_aead_givencrypt, .geniv = "<built-in>", .ivsize = CTR_RFC3686_IV_SIZE, .maxauthsize = SHA512_DIGEST_SIZE, @@ -3983,58 +3947,6 @@ static struct caam_alg_template driver_algs[] = { OP_ALG_AAI_HMAC_PRECOMP, .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, }, - { - .name = "rfc4106(gcm(aes))", - .driver_name = "rfc4106-gcm-aes-caam", - .blocksize = 1, - .type = CRYPTO_ALG_TYPE_AEAD, - .template_aead = { - .setkey = rfc4106_setkey, - .setauthsize = rfc4106_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "<built-in>", - .ivsize = 8, - .maxauthsize = AES_BLOCK_SIZE, - }, - .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, - }, - { - .name = "rfc4543(gcm(aes))", - .driver_name = "rfc4543-gcm-aes-caam", - .blocksize = 1, - .type = CRYPTO_ALG_TYPE_AEAD, - .template_aead = { - .setkey = rfc4543_setkey, - .setauthsize = rfc4543_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = aead_givencrypt, - .geniv = "<built-in>", - .ivsize = 8, - .maxauthsize = AES_BLOCK_SIZE, - }, - .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, - }, - /* Galois Counter Mode */ - { - .name = "gcm(aes)", - .driver_name = "gcm-aes-caam", - .blocksize = 1, - .type = CRYPTO_ALG_TYPE_AEAD, - .template_aead = { - .setkey = gcm_setkey, - .setauthsize = gcm_setauthsize, - .encrypt = aead_encrypt, - .decrypt = aead_decrypt, - .givencrypt = NULL, - .geniv = "<built-in>", - .ivsize = 12, - .maxauthsize = AES_BLOCK_SIZE, - }, - .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, - }, /* ablkcipher descriptor */ { .name = "cbc(aes)", @@ -4124,21 +4036,84 @@ static struct caam_alg_template driver_algs[] = { } }; -struct caam_crypto_alg { - struct list_head entry; +struct caam_alg_entry { int class1_alg_type; int class2_alg_type; int alg_op; +}; + +struct caam_aead_alg { + struct aead_alg aead; + struct caam_alg_entry caam; + bool registered; +}; + +static struct caam_aead_alg driver_aeads[] = { + { + .aead = { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aes-caam", + .cra_blocksize = 1, + }, + .setkey = rfc4106_setkey, + .setauthsize = rfc4106_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, + .ivsize = 8, + .maxauthsize = AES_BLOCK_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, + }, + }, + { + .aead = { + .base = { + .cra_name = "rfc4543(gcm(aes))", + .cra_driver_name = "rfc4543-gcm-aes-caam", + .cra_blocksize = 1, + }, + .setkey = rfc4543_setkey, + .setauthsize = rfc4543_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, + .ivsize = 8, + .maxauthsize = AES_BLOCK_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, + }, + }, + /* Galois Counter Mode */ + { + .aead = { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-caam", + .cra_blocksize = 1, + }, + .setkey = gcm_setkey, + .setauthsize = gcm_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, + .ivsize = 12, + .maxauthsize = AES_BLOCK_SIZE, + }, + .caam = { + .class1_alg_type = OP_ALG_ALGSEL_AES | OP_ALG_AAI_GCM, + }, + }, +}; + +struct caam_crypto_alg { struct crypto_alg crypto_alg; + struct list_head entry; + struct caam_alg_entry caam; }; -static int caam_cra_init(struct crypto_tfm *tfm) +static int caam_init_common(struct caam_ctx *ctx, struct caam_alg_entry *caam) { - struct crypto_alg *alg = tfm->__crt_alg; - struct caam_crypto_alg *caam_alg = - container_of(alg, struct caam_crypto_alg, crypto_alg); - struct caam_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->jrdev = caam_jr_alloc(); if (IS_ERR(ctx->jrdev)) { pr_err("Job Ring Device allocation for transform failed\n"); @@ -4146,17 +4121,35 @@ static int caam_cra_init(struct crypto_tfm *tfm) } /* copy descriptor header template value */ - ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam_alg->class1_alg_type; - ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam_alg->class2_alg_type; - ctx->alg_op = OP_TYPE_CLASS2_ALG | caam_alg->alg_op; + ctx->class1_alg_type = OP_TYPE_CLASS1_ALG | caam->class1_alg_type; + ctx->class2_alg_type = OP_TYPE_CLASS2_ALG | caam->class2_alg_type; + ctx->alg_op = OP_TYPE_CLASS2_ALG | caam->alg_op; return 0; } -static void caam_cra_exit(struct crypto_tfm *tfm) +static int caam_cra_init(struct crypto_tfm *tfm) { + struct crypto_alg *alg = tfm->__crt_alg; + struct caam_crypto_alg *caam_alg = + container_of(alg, struct caam_crypto_alg, crypto_alg); struct caam_ctx *ctx = crypto_tfm_ctx(tfm); + return caam_init_common(ctx, &caam_alg->caam); +} + +static int caam_aead_init(struct crypto_aead *tfm) +{ + struct aead_alg *alg = crypto_aead_alg(tfm); + struct caam_aead_alg *caam_alg = + container_of(alg, struct caam_aead_alg, aead); + struct caam_ctx *ctx = crypto_aead_ctx(tfm); + + return caam_init_common(ctx, &caam_alg->caam); +} + +static void caam_exit_common(struct caam_ctx *ctx) +{ if (ctx->sh_desc_enc_dma && !dma_mapping_error(ctx->jrdev, ctx->sh_desc_enc_dma)) dma_unmap_single(ctx->jrdev, ctx->sh_desc_enc_dma, @@ -4179,10 +4172,28 @@ static void caam_cra_exit(struct crypto_tfm *tfm) caam_jr_free(ctx->jrdev); } +static void caam_cra_exit(struct crypto_tfm *tfm) +{ + caam_exit_common(crypto_tfm_ctx(tfm)); +} + +static void caam_aead_exit(struct crypto_aead *tfm) +{ + caam_exit_common(crypto_aead_ctx(tfm)); +} + static void __exit caam_algapi_exit(void) { struct caam_crypto_alg *t_alg, *n; + int i; + + for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) { + struct caam_aead_alg *t_alg = driver_aeads + i; + + if (t_alg->registered) + crypto_unregister_aead(&t_alg->aead); + } if (!alg_list.next) return; @@ -4235,13 +4246,26 @@ static struct caam_crypto_alg *caam_alg_alloc(struct caam_alg_template break; } - t_alg->class1_alg_type = template->class1_alg_type; - t_alg->class2_alg_type = template->class2_alg_type; - t_alg->alg_op = template->alg_op; + t_alg->caam.class1_alg_type = template->class1_alg_type; + t_alg->caam.class2_alg_type = template->class2_alg_type; + t_alg->caam.alg_op = template->alg_op; return t_alg; } +static void caam_aead_alg_init(struct caam_aead_alg *t_alg) +{ + struct aead_alg *alg = &t_alg->aead; + + alg->base.cra_module = THIS_MODULE; + alg->base.cra_priority = CAAM_CRA_PRIORITY; + alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY; + + alg->init = caam_aead_init; + alg->exit = caam_aead_exit; +} + static int __init caam_algapi_init(void) { struct device_node *dev_node; @@ -4249,6 +4273,7 @@ static int __init caam_algapi_init(void) struct device *ctrldev; void *priv; int i = 0, err = 0; + bool registered = false; dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); if (!dev_node) { @@ -4295,10 +4320,30 @@ static int __init caam_algapi_init(void) pr_warn("%s alg registration failed\n", t_alg->crypto_alg.cra_driver_name); kfree(t_alg); - } else - list_add_tail(&t_alg->entry, &alg_list); + continue; + } + + list_add_tail(&t_alg->entry, &alg_list); + registered = true; } - if (!list_empty(&alg_list)) + + for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) { + struct caam_aead_alg *t_alg = driver_aeads + i; + + caam_aead_alg_init(t_alg); + + err = crypto_register_aead(&t_alg->aead); + if (err) { + pr_warn("%s alg registration failed\n", + t_alg->aead.base.cra_driver_name); + continue; + } + + t_alg->registered = true; + registered = true; + } + + if (registered) pr_info("caam algorithms registered in /proc/crypto\n"); return err; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 332c8ef..dae1e80 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -835,17 +835,17 @@ static int ahash_update_ctx(struct ahash_request *req) src_map_to_sec4_sg(jrdev, req->src, src_nents, edesc->sec4_sg + sec4_sg_src_index, chained); - if (*next_buflen) { + if (*next_buflen) scatterwalk_map_and_copy(next_buf, req->src, to_hash - *buflen, *next_buflen, 0); - state->current_buf = !state->current_buf; - } } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; } + state->current_buf = !state->current_buf; + sh_len = desc_len(sh_desc); desc = edesc->hw_desc; init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | @@ -1268,9 +1268,10 @@ static int ahash_update_no_ctx(struct ahash_request *req) scatterwalk_map_and_copy(next_buf, req->src, to_hash - *buflen, *next_buflen, 0); - state->current_buf = !state->current_buf; } + state->current_buf = !state->current_buf; + sh_len = desc_len(sh_desc); desc = edesc->hw_desc; init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index acd7743..f57f395 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -32,7 +32,7 @@ #include <crypto/des.h> #include <crypto/sha.h> #include <crypto/md5.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/scatterwalk.h> #include <crypto/internal/skcipher.h> diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index efba4cc..efacab7 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -301,7 +301,7 @@ static int caam_remove(struct platform_device *pdev) #endif /* Unmap controller region */ - iounmap(&ctrl); + iounmap(ctrl); return ret; } @@ -496,7 +496,7 @@ static int caam_probe(struct platform_device *pdev) sizeof(struct platform_device *) * rspec, GFP_KERNEL); if (ctrlpriv->jrpdev == NULL) { - iounmap(&ctrl); + iounmap(ctrl); return -ENOMEM; } diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 378ddc1..672c974 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -83,35 +83,35 @@ #endif #endif +/* + * The only users of these wr/rd_reg64 functions is the Job Ring (JR). + * The DMA address registers in the JR are a pair of 32-bit registers. + * The layout is: + * + * base + 0x0000 : most-significant 32 bits + * base + 0x0004 : least-significant 32 bits + * + * The 32-bit version of this core therefore has to write to base + 0x0004 + * to set the 32-bit wide DMA address. This seems to be independent of the + * endianness of the written/read data. + */ + #ifndef CONFIG_64BIT -#ifdef __BIG_ENDIAN -static inline void wr_reg64(u64 __iomem *reg, u64 data) -{ - wr_reg32((u32 __iomem *)reg, (data & 0xffffffff00000000ull) >> 32); - wr_reg32((u32 __iomem *)reg + 1, data & 0x00000000ffffffffull); -} +#define REG64_MS32(reg) ((u32 __iomem *)(reg)) +#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1) -static inline u64 rd_reg64(u64 __iomem *reg) -{ - return (((u64)rd_reg32((u32 __iomem *)reg)) << 32) | - ((u64)rd_reg32((u32 __iomem *)reg + 1)); -} -#else -#ifdef __LITTLE_ENDIAN static inline void wr_reg64(u64 __iomem *reg, u64 data) { - wr_reg32((u32 __iomem *)reg + 1, (data & 0xffffffff00000000ull) >> 32); - wr_reg32((u32 __iomem *)reg, data & 0x00000000ffffffffull); + wr_reg32(REG64_MS32(reg), data >> 32); + wr_reg32(REG64_LS32(reg), data); } static inline u64 rd_reg64(u64 __iomem *reg) { - return (((u64)rd_reg32((u32 __iomem *)reg + 1)) << 32) | - ((u64)rd_reg32((u32 __iomem *)reg)); + return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | + (u64)rd_reg32(REG64_LS32(reg))); } #endif -#endif -#endif /* * jr_outentry diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 3b91821..b68b74c 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -55,6 +55,21 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; } +static inline struct sec4_sg_entry *sg_to_sec4_sg_len( + struct scatterlist *sg, unsigned int total, + struct sec4_sg_entry *sec4_sg_ptr) +{ + do { + unsigned int len = min(sg_dma_len(sg), total); + + dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), len, 0); + sec4_sg_ptr++; + sg = sg_next(sg); + total -= len; + } while (total); + return sec4_sg_ptr - 1; +} + /* count number of elements in scatterlist */ static inline int __sg_count(struct scatterlist *sg_list, int nbytes, bool *chained) @@ -85,34 +100,41 @@ static inline int sg_count(struct scatterlist *sg_list, int nbytes, return sg_nents; } -static int dma_map_sg_chained(struct device *dev, struct scatterlist *sg, - unsigned int nents, enum dma_data_direction dir, - bool chained) +static inline void dma_unmap_sg_chained( + struct device *dev, struct scatterlist *sg, unsigned int nents, + enum dma_data_direction dir, bool chained) { if (unlikely(chained)) { int i; for (i = 0; i < nents; i++) { - dma_map_sg(dev, sg, 1, dir); + dma_unmap_sg(dev, sg, 1, dir); sg = sg_next(sg); } - } else { - dma_map_sg(dev, sg, nents, dir); + } else if (nents) { + dma_unmap_sg(dev, sg, nents, dir); } - return nents; } -static int dma_unmap_sg_chained(struct device *dev, struct scatterlist *sg, - unsigned int nents, enum dma_data_direction dir, - bool chained) +static inline int dma_map_sg_chained( + struct device *dev, struct scatterlist *sg, unsigned int nents, + enum dma_data_direction dir, bool chained) { + struct scatterlist *first = sg; + if (unlikely(chained)) { int i; for (i = 0; i < nents; i++) { - dma_unmap_sg(dev, sg, 1, dir); + if (!dma_map_sg(dev, sg, 1, dir)) { + dma_unmap_sg_chained(dev, first, i, dir, + chained); + nents = 0; + break; + } + sg = sg_next(sg); } - } else { - dma_unmap_sg(dev, sg, nents, dir); - } + } else + nents = dma_map_sg(dev, sg, nents, dir); + return nents; } diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig index 7639ffc..ae38f6b 100644 --- a/drivers/crypto/ccp/Kconfig +++ b/drivers/crypto/ccp/Kconfig @@ -13,7 +13,6 @@ config CRYPTO_DEV_CCP_CRYPTO tristate "Encryption and hashing acceleration support" depends on CRYPTO_DEV_CCP_DD default m - select CRYPTO_ALGAPI select CRYPTO_HASH select CRYPTO_BLKCIPHER select CRYPTO_AUTHENC diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 71f2e3c..d09c6c4 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -52,8 +52,7 @@ struct ccp_dm_workarea { struct ccp_sg_workarea { struct scatterlist *sg; - unsigned int nents; - unsigned int length; + int nents; struct scatterlist *dma_sg; struct device *dma_dev; @@ -496,8 +495,10 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, if (!sg) return 0; - wa->nents = sg_nents(sg); - wa->length = sg->length; + wa->nents = sg_nents_for_len(sg, len); + if (wa->nents < 0) + return wa->nents; + wa->bytes_left = len; wa->sg_used = 0; diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index b1c20b2..c0aa5c5 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -174,8 +174,6 @@ static int ccp_platform_probe(struct platform_device *pdev) } ccp->io_regs = ccp->io_map; - if (!dev->dma_mask) - dev->dma_mask = &dev->coherent_dma_mask; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 48f4535..7ba495f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -25,7 +25,7 @@ #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/algapi.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/scatterwalk.h> @@ -575,7 +575,8 @@ static int init_tfm_ablk(struct crypto_tfm *tfm) static int init_tfm_aead(struct crypto_tfm *tfm) { - tfm->crt_aead.reqsize = sizeof(struct aead_ctx); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_ctx)); return init_tfm(tfm); } @@ -1096,7 +1097,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) { struct ixp_ctx *ctx = crypto_aead_ctx(tfm); u32 *flags = &tfm->base.crt_flags; - unsigned digest_len = crypto_aead_alg(tfm)->maxauthsize; + unsigned digest_len = crypto_aead_maxauthsize(tfm); int ret; if (!ctx->enckey_len && !ctx->authkey_len) @@ -1138,7 +1139,7 @@ out: static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - int max = crypto_aead_alg(tfm)->maxauthsize >> 2; + int max = crypto_aead_maxauthsize(tfm) >> 2; if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3)) return -EINVAL; diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile new file mode 100644 index 0000000..0c12b13 --- /dev/null +++ b/drivers/crypto/marvell/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o +marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c new file mode 100644 index 0000000..a432633 --- /dev/null +++ b/drivers/crypto/marvell/cesa.c @@ -0,0 +1,548 @@ +/* + * Support for Marvell's Cryptographic Engine and Security Accelerator (CESA) + * that can be found on the following platform: Orion, Kirkwood, Armada. This + * driver supports the TDMA engine on platforms on which it is available. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include <linux/genalloc.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kthread.h> +#include <linux/mbus.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/clk.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_irq.h> + +#include "cesa.h" + +static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); +module_param_named(allhwsupport, allhwsupport, int, 0444); +MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)"); + +struct mv_cesa_dev *cesa_dev; + +static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req, *backlog; + struct mv_cesa_ctx *ctx; + + spin_lock_bh(&cesa_dev->lock); + backlog = crypto_get_backlog(&cesa_dev->queue); + req = crypto_dequeue_request(&cesa_dev->queue); + engine->req = req; + spin_unlock_bh(&cesa_dev->lock); + + if (!req) + return; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + ctx = crypto_tfm_ctx(req->tfm); + ctx->ops->prepare(req, engine); + ctx->ops->step(req); +} + +static irqreturn_t mv_cesa_int(int irq, void *priv) +{ + struct mv_cesa_engine *engine = priv; + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + u32 status, mask; + irqreturn_t ret = IRQ_NONE; + + while (true) { + int res; + + mask = mv_cesa_get_int_mask(engine); + status = readl(engine->regs + CESA_SA_INT_STATUS); + + if (!(status & mask)) + break; + + /* + * TODO: avoid clearing the FPGA_INT_STATUS if this not + * relevant on some platforms. + */ + writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); + writel(~status, engine->regs + CESA_SA_INT_STATUS); + + ret = IRQ_HANDLED; + spin_lock_bh(&engine->lock); + req = engine->req; + spin_unlock_bh(&engine->lock); + if (req) { + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status & mask); + if (res != -EINPROGRESS) { + spin_lock_bh(&engine->lock); + engine->req = NULL; + mv_cesa_dequeue_req_unlocked(engine); + spin_unlock_bh(&engine->lock); + ctx->ops->cleanup(req); + local_bh_disable(); + req->complete(req, res); + local_bh_enable(); + } else { + ctx->ops->step(req); + } + } + } + + return ret; +} + +int mv_cesa_queue_req(struct crypto_async_request *req) +{ + int ret; + int i; + + spin_lock_bh(&cesa_dev->lock); + ret = crypto_enqueue_request(&cesa_dev->queue, req); + spin_unlock_bh(&cesa_dev->lock); + + if (ret != -EINPROGRESS) + return ret; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + spin_lock_bh(&cesa_dev->engines[i].lock); + if (!cesa_dev->engines[i].req) + mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); + spin_unlock_bh(&cesa_dev->engines[i].lock); + } + + return -EINPROGRESS; +} + +static int mv_cesa_add_algs(struct mv_cesa_dev *cesa) +{ + int ret; + int i, j; + + for (i = 0; i < cesa->caps->ncipher_algs; i++) { + ret = crypto_register_alg(cesa->caps->cipher_algs[i]); + if (ret) + goto err_unregister_crypto; + } + + for (i = 0; i < cesa->caps->nahash_algs; i++) { + ret = crypto_register_ahash(cesa->caps->ahash_algs[i]); + if (ret) + goto err_unregister_ahash; + } + + return 0; + +err_unregister_ahash: + for (j = 0; j < i; j++) + crypto_unregister_ahash(cesa->caps->ahash_algs[j]); + i = cesa->caps->ncipher_algs; + +err_unregister_crypto: + for (j = 0; j < i; j++) + crypto_unregister_alg(cesa->caps->cipher_algs[j]); + + return ret; +} + +static void mv_cesa_remove_algs(struct mv_cesa_dev *cesa) +{ + int i; + + for (i = 0; i < cesa->caps->nahash_algs; i++) + crypto_unregister_ahash(cesa->caps->ahash_algs[i]); + + for (i = 0; i < cesa->caps->ncipher_algs; i++) + crypto_unregister_alg(cesa->caps->cipher_algs[i]); +} + +static struct crypto_alg *orion_cipher_algs[] = { + &mv_cesa_ecb_des_alg, + &mv_cesa_cbc_des_alg, + &mv_cesa_ecb_des3_ede_alg, + &mv_cesa_cbc_des3_ede_alg, + &mv_cesa_ecb_aes_alg, + &mv_cesa_cbc_aes_alg, +}; + +static struct ahash_alg *orion_ahash_algs[] = { + &mv_md5_alg, + &mv_sha1_alg, + &mv_ahmac_md5_alg, + &mv_ahmac_sha1_alg, +}; + +static struct crypto_alg *armada_370_cipher_algs[] = { + &mv_cesa_ecb_des_alg, + &mv_cesa_cbc_des_alg, + &mv_cesa_ecb_des3_ede_alg, + &mv_cesa_cbc_des3_ede_alg, + &mv_cesa_ecb_aes_alg, + &mv_cesa_cbc_aes_alg, +}; + +static struct ahash_alg *armada_370_ahash_algs[] = { + &mv_md5_alg, + &mv_sha1_alg, + &mv_sha256_alg, + &mv_ahmac_md5_alg, + &mv_ahmac_sha1_alg, + &mv_ahmac_sha256_alg, +}; + +static const struct mv_cesa_caps orion_caps = { + .nengines = 1, + .cipher_algs = orion_cipher_algs, + .ncipher_algs = ARRAY_SIZE(orion_cipher_algs), + .ahash_algs = orion_ahash_algs, + .nahash_algs = ARRAY_SIZE(orion_ahash_algs), + .has_tdma = false, +}; + +static const struct mv_cesa_caps kirkwood_caps = { + .nengines = 1, + .cipher_algs = orion_cipher_algs, + .ncipher_algs = ARRAY_SIZE(orion_cipher_algs), + .ahash_algs = orion_ahash_algs, + .nahash_algs = ARRAY_SIZE(orion_ahash_algs), + .has_tdma = true, +}; + +static const struct mv_cesa_caps armada_370_caps = { + .nengines = 1, + .cipher_algs = armada_370_cipher_algs, + .ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs), + .ahash_algs = armada_370_ahash_algs, + .nahash_algs = ARRAY_SIZE(armada_370_ahash_algs), + .has_tdma = true, +}; + +static const struct mv_cesa_caps armada_xp_caps = { + .nengines = 2, + .cipher_algs = armada_370_cipher_algs, + .ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs), + .ahash_algs = armada_370_ahash_algs, + .nahash_algs = ARRAY_SIZE(armada_370_ahash_algs), + .has_tdma = true, +}; + +static const struct of_device_id mv_cesa_of_match_table[] = { + { .compatible = "marvell,orion-crypto", .data = &orion_caps }, + { .compatible = "marvell,kirkwood-crypto", .data = &kirkwood_caps }, + { .compatible = "marvell,dove-crypto", .data = &kirkwood_caps }, + { .compatible = "marvell,armada-370-crypto", .data = &armada_370_caps }, + { .compatible = "marvell,armada-xp-crypto", .data = &armada_xp_caps }, + { .compatible = "marvell,armada-375-crypto", .data = &armada_xp_caps }, + { .compatible = "marvell,armada-38x-crypto", .data = &armada_xp_caps }, + {} +}; +MODULE_DEVICE_TABLE(of, mv_cesa_of_match_table); + +static void +mv_cesa_conf_mbus_windows(struct mv_cesa_engine *engine, + const struct mbus_dram_target_info *dram) +{ + void __iomem *iobase = engine->regs; + int i; + + for (i = 0; i < 4; i++) { + writel(0, iobase + CESA_TDMA_WINDOW_CTRL(i)); + writel(0, iobase + CESA_TDMA_WINDOW_BASE(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + const struct mbus_dram_window *cs = dram->cs + i; + + writel(((cs->size - 1) & 0xffff0000) | + (cs->mbus_attr << 8) | + (dram->mbus_dram_target_id << 4) | 1, + iobase + CESA_TDMA_WINDOW_CTRL(i)); + writel(cs->base, iobase + CESA_TDMA_WINDOW_BASE(i)); + } +} + +static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) +{ + struct device *dev = cesa->dev; + struct mv_cesa_dev_dma *dma; + + if (!cesa->caps->has_tdma) + return 0; + + dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->tdma_desc_pool = dmam_pool_create("tdma_desc", dev, + sizeof(struct mv_cesa_tdma_desc), + 16, 0); + if (!dma->tdma_desc_pool) + return -ENOMEM; + + dma->op_pool = dmam_pool_create("cesa_op", dev, + sizeof(struct mv_cesa_op_ctx), 16, 0); + if (!dma->op_pool) + return -ENOMEM; + + dma->cache_pool = dmam_pool_create("cesa_cache", dev, + CESA_MAX_HASH_BLOCK_SIZE, 1, 0); + if (!dma->cache_pool) + return -ENOMEM; + + dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0); + if (!dma->cache_pool) + return -ENOMEM; + + cesa->dma = dma; + + return 0; +} + +static int mv_cesa_get_sram(struct platform_device *pdev, int idx) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + struct mv_cesa_engine *engine = &cesa->engines[idx]; + const char *res_name = "sram"; + struct resource *res; + + engine->pool = of_get_named_gen_pool(cesa->dev->of_node, + "marvell,crypto-srams", + idx); + if (engine->pool) { + engine->sram = gen_pool_dma_alloc(engine->pool, + cesa->sram_size, + &engine->sram_dma); + if (engine->sram) + return 0; + + engine->pool = NULL; + return -ENOMEM; + } + + if (cesa->caps->nengines > 1) { + if (!idx) + res_name = "sram0"; + else + res_name = "sram1"; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + res_name); + if (!res || resource_size(res) < cesa->sram_size) + return -EINVAL; + + engine->sram = devm_ioremap_resource(cesa->dev, res); + if (IS_ERR(engine->sram)) + return PTR_ERR(engine->sram); + + engine->sram_dma = phys_to_dma(cesa->dev, + (phys_addr_t)res->start); + + return 0; +} + +static void mv_cesa_put_sram(struct platform_device *pdev, int idx) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + struct mv_cesa_engine *engine = &cesa->engines[idx]; + + if (!engine->pool) + return; + + gen_pool_free(engine->pool, (unsigned long)engine->sram, + cesa->sram_size); +} + +static int mv_cesa_probe(struct platform_device *pdev) +{ + const struct mv_cesa_caps *caps = &orion_caps; + const struct mbus_dram_target_info *dram; + const struct of_device_id *match; + struct device *dev = &pdev->dev; + struct mv_cesa_dev *cesa; + struct mv_cesa_engine *engines; + struct resource *res; + int irq, ret, i; + u32 sram_size; + + if (cesa_dev) { + dev_err(&pdev->dev, "Only one CESA device authorized\n"); + return -EEXIST; + } + + if (dev->of_node) { + match = of_match_node(mv_cesa_of_match_table, dev->of_node); + if (!match || !match->data) + return -ENOTSUPP; + + caps = match->data; + } + + if ((caps == &orion_caps || caps == &kirkwood_caps) && !allhwsupport) + return -ENOTSUPP; + + cesa = devm_kzalloc(dev, sizeof(*cesa), GFP_KERNEL); + if (!cesa) + return -ENOMEM; + + cesa->caps = caps; + cesa->dev = dev; + + sram_size = CESA_SA_DEFAULT_SRAM_SIZE; + of_property_read_u32(cesa->dev->of_node, "marvell,crypto-sram-size", + &sram_size); + if (sram_size < CESA_SA_MIN_SRAM_SIZE) + sram_size = CESA_SA_MIN_SRAM_SIZE; + + cesa->sram_size = sram_size; + cesa->engines = devm_kzalloc(dev, caps->nengines * sizeof(*engines), + GFP_KERNEL); + if (!cesa->engines) + return -ENOMEM; + + spin_lock_init(&cesa->lock); + crypto_init_queue(&cesa->queue, 50); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); + cesa->regs = devm_ioremap_resource(dev, res); + if (IS_ERR(cesa->regs)) + return -ENOMEM; + + ret = mv_cesa_dev_dma_init(cesa); + if (ret) + return ret; + + dram = mv_mbus_dram_info_nooverlap(); + + platform_set_drvdata(pdev, cesa); + + for (i = 0; i < caps->nengines; i++) { + struct mv_cesa_engine *engine = &cesa->engines[i]; + char res_name[7]; + + engine->id = i; + spin_lock_init(&engine->lock); + + ret = mv_cesa_get_sram(pdev, i); + if (ret) + goto err_cleanup; + + irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = irq; + goto err_cleanup; + } + + /* + * Not all platforms can gate the CESA clocks: do not complain + * if the clock does not exist. + */ + snprintf(res_name, sizeof(res_name), "cesa%d", i); + engine->clk = devm_clk_get(dev, res_name); + if (IS_ERR(engine->clk)) { + engine->clk = devm_clk_get(dev, NULL); + if (IS_ERR(engine->clk)) + engine->clk = NULL; + } + + snprintf(res_name, sizeof(res_name), "cesaz%d", i); + engine->zclk = devm_clk_get(dev, res_name); + if (IS_ERR(engine->zclk)) + engine->zclk = NULL; + + ret = clk_prepare_enable(engine->clk); + if (ret) + goto err_cleanup; + + ret = clk_prepare_enable(engine->zclk); + if (ret) + goto err_cleanup; + + engine->regs = cesa->regs + CESA_ENGINE_OFF(i); + + if (dram && cesa->caps->has_tdma) + mv_cesa_conf_mbus_windows(&cesa->engines[i], dram); + + writel(0, cesa->engines[i].regs + CESA_SA_INT_STATUS); + writel(CESA_SA_CFG_STOP_DIG_ERR, + cesa->engines[i].regs + CESA_SA_CFG); + writel(engine->sram_dma & CESA_SA_SRAM_MSK, + cesa->engines[i].regs + CESA_SA_DESC_P0); + + ret = devm_request_threaded_irq(dev, irq, NULL, mv_cesa_int, + IRQF_ONESHOT, + dev_name(&pdev->dev), + &cesa->engines[i]); + if (ret) + goto err_cleanup; + } + + cesa_dev = cesa; + + ret = mv_cesa_add_algs(cesa); + if (ret) { + cesa_dev = NULL; + goto err_cleanup; + } + + dev_info(dev, "CESA device successfully registered\n"); + + return 0; + +err_cleanup: + for (i = 0; i < caps->nengines; i++) { + clk_disable_unprepare(cesa->engines[i].zclk); + clk_disable_unprepare(cesa->engines[i].clk); + mv_cesa_put_sram(pdev, i); + } + + return ret; +} + +static int mv_cesa_remove(struct platform_device *pdev) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + int i; + + mv_cesa_remove_algs(cesa); + + for (i = 0; i < cesa->caps->nengines; i++) { + clk_disable_unprepare(cesa->engines[i].zclk); + clk_disable_unprepare(cesa->engines[i].clk); + mv_cesa_put_sram(pdev, i); + } + + return 0; +} + +static struct platform_driver marvell_cesa = { + .probe = mv_cesa_probe, + .remove = mv_cesa_remove, + .driver = { + .owner = THIS_MODULE, + .name = "marvell-cesa", + .of_match_table = mv_cesa_of_match_table, + }, +}; +module_platform_driver(marvell_cesa); + +MODULE_ALIAS("platform:mv_crypto"); +MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>"); +MODULE_AUTHOR("Arnaud Ebalard <arno@natisbad.org>"); +MODULE_DESCRIPTION("Support for Marvell's cryptographic engine"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h new file mode 100644 index 0000000..b60698b --- /dev/null +++ b/drivers/crypto/marvell/cesa.h @@ -0,0 +1,791 @@ +#ifndef __MARVELL_CESA_H__ +#define __MARVELL_CESA_H__ + +#include <crypto/algapi.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> + +#include <linux/crypto.h> +#include <linux/dmapool.h> + +#define CESA_ENGINE_OFF(i) (((i) * 0x2000)) + +#define CESA_TDMA_BYTE_CNT 0x800 +#define CESA_TDMA_SRC_ADDR 0x810 +#define CESA_TDMA_DST_ADDR 0x820 +#define CESA_TDMA_NEXT_ADDR 0x830 + +#define CESA_TDMA_CONTROL 0x840 +#define CESA_TDMA_DST_BURST GENMASK(2, 0) +#define CESA_TDMA_DST_BURST_32B 3 +#define CESA_TDMA_DST_BURST_128B 4 +#define CESA_TDMA_OUT_RD_EN BIT(4) +#define CESA_TDMA_SRC_BURST GENMASK(8, 6) +#define CESA_TDMA_SRC_BURST_32B (3 << 6) +#define CESA_TDMA_SRC_BURST_128B (4 << 6) +#define CESA_TDMA_CHAIN BIT(9) +#define CESA_TDMA_BYTE_SWAP BIT(11) +#define CESA_TDMA_NO_BYTE_SWAP BIT(11) +#define CESA_TDMA_EN BIT(12) +#define CESA_TDMA_FETCH_ND BIT(13) +#define CESA_TDMA_ACT BIT(14) + +#define CESA_TDMA_CUR 0x870 +#define CESA_TDMA_ERROR_CAUSE 0x8c8 +#define CESA_TDMA_ERROR_MSK 0x8cc + +#define CESA_TDMA_WINDOW_BASE(x) (((x) * 0x8) + 0xa00) +#define CESA_TDMA_WINDOW_CTRL(x) (((x) * 0x8) + 0xa04) + +#define CESA_IVDIG(x) (0xdd00 + ((x) * 4) + \ + (((x) < 5) ? 0 : 0x14)) + +#define CESA_SA_CMD 0xde00 +#define CESA_SA_CMD_EN_CESA_SA_ACCL0 BIT(0) +#define CESA_SA_CMD_EN_CESA_SA_ACCL1 BIT(1) +#define CESA_SA_CMD_DISABLE_SEC BIT(2) + +#define CESA_SA_DESC_P0 0xde04 + +#define CESA_SA_DESC_P1 0xde14 + +#define CESA_SA_CFG 0xde08 +#define CESA_SA_CFG_STOP_DIG_ERR GENMASK(1, 0) +#define CESA_SA_CFG_DIG_ERR_CONT 0 +#define CESA_SA_CFG_DIG_ERR_SKIP 1 +#define CESA_SA_CFG_DIG_ERR_STOP 3 +#define CESA_SA_CFG_CH0_W_IDMA BIT(7) +#define CESA_SA_CFG_CH1_W_IDMA BIT(8) +#define CESA_SA_CFG_ACT_CH0_IDMA BIT(9) +#define CESA_SA_CFG_ACT_CH1_IDMA BIT(10) +#define CESA_SA_CFG_MULTI_PKT BIT(11) +#define CESA_SA_CFG_PARA_DIS BIT(13) + +#define CESA_SA_ACCEL_STATUS 0xde0c +#define CESA_SA_ST_ACT_0 BIT(0) +#define CESA_SA_ST_ACT_1 BIT(1) + +/* + * CESA_SA_FPGA_INT_STATUS looks like a FPGA leftover and is documented only + * in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA + * and someone forgot to remove it while switching to the core and moving to + * CESA_SA_INT_STATUS. + */ +#define CESA_SA_FPGA_INT_STATUS 0xdd68 +#define CESA_SA_INT_STATUS 0xde20 +#define CESA_SA_INT_AUTH_DONE BIT(0) +#define CESA_SA_INT_DES_E_DONE BIT(1) +#define CESA_SA_INT_AES_E_DONE BIT(2) +#define CESA_SA_INT_AES_D_DONE BIT(3) +#define CESA_SA_INT_ENC_DONE BIT(4) +#define CESA_SA_INT_ACCEL0_DONE BIT(5) +#define CESA_SA_INT_ACCEL1_DONE BIT(6) +#define CESA_SA_INT_ACC0_IDMA_DONE BIT(7) +#define CESA_SA_INT_ACC1_IDMA_DONE BIT(8) +#define CESA_SA_INT_IDMA_DONE BIT(9) +#define CESA_SA_INT_IDMA_OWN_ERR BIT(10) + +#define CESA_SA_INT_MSK 0xde24 + +#define CESA_SA_DESC_CFG_OP_MAC_ONLY 0 +#define CESA_SA_DESC_CFG_OP_CRYPT_ONLY 1 +#define CESA_SA_DESC_CFG_OP_MAC_CRYPT 2 +#define CESA_SA_DESC_CFG_OP_CRYPT_MAC 3 +#define CESA_SA_DESC_CFG_OP_MSK GENMASK(1, 0) +#define CESA_SA_DESC_CFG_MACM_SHA256 (1 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_SHA256 (3 << 4) +#define CESA_SA_DESC_CFG_MACM_MD5 (4 << 4) +#define CESA_SA_DESC_CFG_MACM_SHA1 (5 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_MD5 (6 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_SHA1 (7 << 4) +#define CESA_SA_DESC_CFG_MACM_MSK GENMASK(6, 4) +#define CESA_SA_DESC_CFG_CRYPTM_DES (1 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_3DES (2 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_AES (3 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_MSK GENMASK(9, 8) +#define CESA_SA_DESC_CFG_DIR_ENC (0 << 12) +#define CESA_SA_DESC_CFG_DIR_DEC (1 << 12) +#define CESA_SA_DESC_CFG_CRYPTCM_ECB (0 << 16) +#define CESA_SA_DESC_CFG_CRYPTCM_CBC (1 << 16) +#define CESA_SA_DESC_CFG_CRYPTCM_MSK BIT(16) +#define CESA_SA_DESC_CFG_3DES_EEE (0 << 20) +#define CESA_SA_DESC_CFG_3DES_EDE (1 << 20) +#define CESA_SA_DESC_CFG_AES_LEN_128 (0 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_192 (1 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_256 (2 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_MSK GENMASK(25, 24) +#define CESA_SA_DESC_CFG_NOT_FRAG (0 << 30) +#define CESA_SA_DESC_CFG_FIRST_FRAG (1 << 30) +#define CESA_SA_DESC_CFG_LAST_FRAG (2 << 30) +#define CESA_SA_DESC_CFG_MID_FRAG (3 << 30) +#define CESA_SA_DESC_CFG_FRAG_MSK GENMASK(31, 30) + +/* + * /-----------\ 0 + * | ACCEL CFG | 4 * 8 + * |-----------| 0x20 + * | CRYPT KEY | 8 * 4 + * |-----------| 0x40 + * | IV IN | 4 * 4 + * |-----------| 0x40 (inplace) + * | IV BUF | 4 * 4 + * |-----------| 0x80 + * | DATA IN | 16 * x (max ->max_req_size) + * |-----------| 0x80 (inplace operation) + * | DATA OUT | 16 * x (max ->max_req_size) + * \-----------/ SRAM size + */ + +/* + * Hashing memory map: + * /-----------\ 0 + * | ACCEL CFG | 4 * 8 + * |-----------| 0x20 + * | Inner IV | 8 * 4 + * |-----------| 0x40 + * | Outer IV | 8 * 4 + * |-----------| 0x60 + * | Output BUF| 8 * 4 + * |-----------| 0x80 + * | DATA IN | 64 * x (max ->max_req_size) + * \-----------/ SRAM size + */ + +#define CESA_SA_CFG_SRAM_OFFSET 0x00 +#define CESA_SA_DATA_SRAM_OFFSET 0x80 + +#define CESA_SA_CRYPT_KEY_SRAM_OFFSET 0x20 +#define CESA_SA_CRYPT_IV_SRAM_OFFSET 0x40 + +#define CESA_SA_MAC_IIV_SRAM_OFFSET 0x20 +#define CESA_SA_MAC_OIV_SRAM_OFFSET 0x40 +#define CESA_SA_MAC_DIG_SRAM_OFFSET 0x60 + +#define CESA_SA_DESC_CRYPT_DATA(offset) \ + cpu_to_le32((CESA_SA_DATA_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_DATA_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_DESC_CRYPT_IV(offset) \ + cpu_to_le32((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_DESC_CRYPT_KEY(offset) \ + cpu_to_le32(CESA_SA_CRYPT_KEY_SRAM_OFFSET + (offset)) + +#define CESA_SA_DESC_MAC_DATA(offset) \ + cpu_to_le32(CESA_SA_DATA_SRAM_OFFSET + (offset)) +#define CESA_SA_DESC_MAC_DATA_MSK GENMASK(15, 0) + +#define CESA_SA_DESC_MAC_TOTAL_LEN(total_len) cpu_to_le32((total_len) << 16) +#define CESA_SA_DESC_MAC_TOTAL_LEN_MSK GENMASK(31, 16) + +#define CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX 0xffff + +#define CESA_SA_DESC_MAC_DIGEST(offset) \ + cpu_to_le32(CESA_SA_MAC_DIG_SRAM_OFFSET + (offset)) +#define CESA_SA_DESC_MAC_DIGEST_MSK GENMASK(15, 0) + +#define CESA_SA_DESC_MAC_FRAG_LEN(frag_len) cpu_to_le32((frag_len) << 16) +#define CESA_SA_DESC_MAC_FRAG_LEN_MSK GENMASK(31, 16) + +#define CESA_SA_DESC_MAC_IV(offset) \ + cpu_to_le32((CESA_SA_MAC_IIV_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_MAC_OIV_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_SRAM_SIZE 2048 +#define CESA_SA_SRAM_PAYLOAD_SIZE (cesa_dev->sram_size - \ + CESA_SA_DATA_SRAM_OFFSET) + +#define CESA_SA_DEFAULT_SRAM_SIZE 2048 +#define CESA_SA_MIN_SRAM_SIZE 1024 + +#define CESA_SA_SRAM_MSK (2048 - 1) + +#define CESA_MAX_HASH_BLOCK_SIZE 64 +#define CESA_HASH_BLOCK_SIZE_MSK (CESA_MAX_HASH_BLOCK_SIZE - 1) + +/** + * struct mv_cesa_sec_accel_desc - security accelerator descriptor + * @config: engine config + * @enc_p: input and output data pointers for a cipher operation + * @enc_len: cipher operation length + * @enc_key_p: cipher key pointer + * @enc_iv: cipher IV pointers + * @mac_src_p: input pointer and total hash length + * @mac_digest: digest pointer and hash operation length + * @mac_iv: hmac IV pointers + * + * Structure passed to the CESA engine to describe the crypto operation + * to be executed. + */ +struct mv_cesa_sec_accel_desc { + u32 config; + u32 enc_p; + u32 enc_len; + u32 enc_key_p; + u32 enc_iv; + u32 mac_src_p; + u32 mac_digest; + u32 mac_iv; +}; + +/** + * struct mv_cesa_blkcipher_op_ctx - cipher operation context + * @key: cipher key + * @iv: cipher IV + * + * Context associated to a cipher operation. + */ +struct mv_cesa_blkcipher_op_ctx { + u32 key[8]; + u32 iv[4]; +}; + +/** + * struct mv_cesa_hash_op_ctx - hash or hmac operation context + * @key: cipher key + * @iv: cipher IV + * + * Context associated to an hash or hmac operation. + */ +struct mv_cesa_hash_op_ctx { + u32 iv[16]; + u32 hash[8]; +}; + +/** + * struct mv_cesa_op_ctx - crypto operation context + * @desc: CESA descriptor + * @ctx: context associated to the crypto operation + * + * Context associated to a crypto operation. + */ +struct mv_cesa_op_ctx { + struct mv_cesa_sec_accel_desc desc; + union { + struct mv_cesa_blkcipher_op_ctx blkcipher; + struct mv_cesa_hash_op_ctx hash; + } ctx; +}; + +/* TDMA descriptor flags */ +#define CESA_TDMA_DST_IN_SRAM BIT(31) +#define CESA_TDMA_SRC_IN_SRAM BIT(30) +#define CESA_TDMA_TYPE_MSK GENMASK(29, 0) +#define CESA_TDMA_DUMMY 0 +#define CESA_TDMA_DATA 1 +#define CESA_TDMA_OP 2 + +/** + * struct mv_cesa_tdma_desc - TDMA descriptor + * @byte_cnt: number of bytes to transfer + * @src: DMA address of the source + * @dst: DMA address of the destination + * @next_dma: DMA address of the next TDMA descriptor + * @cur_dma: DMA address of this TDMA descriptor + * @next: pointer to the next TDMA descriptor + * @op: CESA operation attached to this TDMA descriptor + * @data: raw data attached to this TDMA descriptor + * @flags: flags describing the TDMA transfer. See the + * "TDMA descriptor flags" section above + * + * TDMA descriptor used to create a transfer chain describing a crypto + * operation. + */ +struct mv_cesa_tdma_desc { + u32 byte_cnt; + u32 src; + u32 dst; + u32 next_dma; + u32 cur_dma; + struct mv_cesa_tdma_desc *next; + union { + struct mv_cesa_op_ctx *op; + void *data; + }; + u32 flags; +}; + +/** + * struct mv_cesa_sg_dma_iter - scatter-gather iterator + * @dir: transfer direction + * @sg: scatter list + * @offset: current position in the scatter list + * @op_offset: current position in the crypto operation + * + * Iterator used to iterate over a scatterlist while creating a TDMA chain for + * a crypto operation. + */ +struct mv_cesa_sg_dma_iter { + enum dma_data_direction dir; + struct scatterlist *sg; + unsigned int offset; + unsigned int op_offset; +}; + +/** + * struct mv_cesa_dma_iter - crypto operation iterator + * @len: the crypto operation length + * @offset: current position in the crypto operation + * @op_len: sub-operation length (the crypto engine can only act on 2kb + * chunks) + * + * Iterator used to create a TDMA chain for a given crypto operation. + */ +struct mv_cesa_dma_iter { + unsigned int len; + unsigned int offset; + unsigned int op_len; +}; + +/** + * struct mv_cesa_tdma_chain - TDMA chain + * @first: first entry in the TDMA chain + * @last: last entry in the TDMA chain + * + * Stores a TDMA chain for a specific crypto operation. + */ +struct mv_cesa_tdma_chain { + struct mv_cesa_tdma_desc *first; + struct mv_cesa_tdma_desc *last; +}; + +struct mv_cesa_engine; + +/** + * struct mv_cesa_caps - CESA device capabilities + * @engines: number of engines + * @has_tdma: whether this device has a TDMA block + * @cipher_algs: supported cipher algorithms + * @ncipher_algs: number of supported cipher algorithms + * @ahash_algs: supported hash algorithms + * @nahash_algs: number of supported hash algorithms + * + * Structure used to describe CESA device capabilities. + */ +struct mv_cesa_caps { + int nengines; + bool has_tdma; + struct crypto_alg **cipher_algs; + int ncipher_algs; + struct ahash_alg **ahash_algs; + int nahash_algs; +}; + +/** + * struct mv_cesa_dev_dma - DMA pools + * @tdma_desc_pool: TDMA desc pool + * @op_pool: crypto operation pool + * @cache_pool: data cache pool (used by hash implementation when the + * hash request is smaller than the hash block size) + * @padding_pool: padding pool (used by hash implementation when hardware + * padding cannot be used) + * + * Structure containing the different DMA pools used by this driver. + */ +struct mv_cesa_dev_dma { + struct dma_pool *tdma_desc_pool; + struct dma_pool *op_pool; + struct dma_pool *cache_pool; + struct dma_pool *padding_pool; +}; + +/** + * struct mv_cesa_dev - CESA device + * @caps: device capabilities + * @regs: device registers + * @sram_size: usable SRAM size + * @lock: device lock + * @queue: crypto request queue + * @engines: array of engines + * @dma: dma pools + * + * Structure storing CESA device information. + */ +struct mv_cesa_dev { + const struct mv_cesa_caps *caps; + void __iomem *regs; + struct device *dev; + unsigned int sram_size; + spinlock_t lock; + struct crypto_queue queue; + struct mv_cesa_engine *engines; + struct mv_cesa_dev_dma *dma; +}; + +/** + * struct mv_cesa_engine - CESA engine + * @id: engine id + * @regs: engine registers + * @sram: SRAM memory region + * @sram_dma: DMA address of the SRAM memory region + * @lock: engine lock + * @req: current crypto request + * @clk: engine clk + * @zclk: engine zclk + * @max_req_len: maximum chunk length (useful to create the TDMA chain) + * @int_mask: interrupt mask cache + * @pool: memory pool pointing to the memory region reserved in + * SRAM + * + * Structure storing CESA engine information. + */ +struct mv_cesa_engine { + int id; + void __iomem *regs; + void __iomem *sram; + dma_addr_t sram_dma; + spinlock_t lock; + struct crypto_async_request *req; + struct clk *clk; + struct clk *zclk; + size_t max_req_len; + u32 int_mask; + struct gen_pool *pool; +}; + +/** + * struct mv_cesa_req_ops - CESA request operations + * @prepare: prepare a request to be executed on the specified engine + * @process: process a request chunk result (should return 0 if the + * operation, -EINPROGRESS if it needs more steps or an error + * code) + * @step: launch the crypto operation on the next chunk + * @cleanup: cleanup the crypto request (release associated data) + */ +struct mv_cesa_req_ops { + void (*prepare)(struct crypto_async_request *req, + struct mv_cesa_engine *engine); + int (*process)(struct crypto_async_request *req, u32 status); + void (*step)(struct crypto_async_request *req); + void (*cleanup)(struct crypto_async_request *req); +}; + +/** + * struct mv_cesa_ctx - CESA operation context + * @ops: crypto operations + * + * Base context structure inherited by operation specific ones. + */ +struct mv_cesa_ctx { + const struct mv_cesa_req_ops *ops; +}; + +/** + * struct mv_cesa_hash_ctx - CESA hash operation context + * @base: base context structure + * + * Hash context structure. + */ +struct mv_cesa_hash_ctx { + struct mv_cesa_ctx base; +}; + +/** + * struct mv_cesa_hash_ctx - CESA hmac operation context + * @base: base context structure + * @iv: initialization vectors + * + * HMAC context structure. + */ +struct mv_cesa_hmac_ctx { + struct mv_cesa_ctx base; + u32 iv[16]; +}; + +/** + * enum mv_cesa_req_type - request type definitions + * @CESA_STD_REQ: standard request + * @CESA_DMA_REQ: DMA request + */ +enum mv_cesa_req_type { + CESA_STD_REQ, + CESA_DMA_REQ, +}; + +/** + * struct mv_cesa_req - CESA request + * @type: request type + * @engine: engine associated with this request + */ +struct mv_cesa_req { + enum mv_cesa_req_type type; + struct mv_cesa_engine *engine; +}; + +/** + * struct mv_cesa_tdma_req - CESA TDMA request + * @base: base information + * @chain: TDMA chain + */ +struct mv_cesa_tdma_req { + struct mv_cesa_req base; + struct mv_cesa_tdma_chain chain; +}; + +/** + * struct mv_cesa_sg_std_iter - CESA scatter-gather iterator for standard + * requests + * @iter: sg mapping iterator + * @offset: current offset in the SG entry mapped in memory + */ +struct mv_cesa_sg_std_iter { + struct sg_mapping_iter iter; + unsigned int offset; +}; + +/** + * struct mv_cesa_ablkcipher_std_req - cipher standard request + * @base: base information + * @op: operation context + * @offset: current operation offset + * @size: size of the crypto operation + */ +struct mv_cesa_ablkcipher_std_req { + struct mv_cesa_req base; + struct mv_cesa_op_ctx op; + unsigned int offset; + unsigned int size; + bool skip_ctx; +}; + +/** + * struct mv_cesa_ablkcipher_req - cipher request + * @req: type specific request information + * @src_nents: number of entries in the src sg list + * @dst_nents: number of entries in the dest sg list + */ +struct mv_cesa_ablkcipher_req { + union { + struct mv_cesa_req base; + struct mv_cesa_tdma_req dma; + struct mv_cesa_ablkcipher_std_req std; + } req; + int src_nents; + int dst_nents; +}; + +/** + * struct mv_cesa_ahash_std_req - standard hash request + * @base: base information + * @offset: current operation offset + */ +struct mv_cesa_ahash_std_req { + struct mv_cesa_req base; + unsigned int offset; +}; + +/** + * struct mv_cesa_ahash_dma_req - DMA hash request + * @base: base information + * @padding: padding buffer + * @padding_dma: DMA address of the padding buffer + * @cache_dma: DMA address of the cache buffer + */ +struct mv_cesa_ahash_dma_req { + struct mv_cesa_tdma_req base; + u8 *padding; + dma_addr_t padding_dma; + dma_addr_t cache_dma; +}; + +/** + * struct mv_cesa_ahash_req - hash request + * @req: type specific request information + * @cache: cache buffer + * @cache_ptr: write pointer in the cache buffer + * @len: hash total length + * @src_nents: number of entries in the scatterlist + * @last_req: define whether the current operation is the last one + * or not + * @state: hash state + */ +struct mv_cesa_ahash_req { + union { + struct mv_cesa_req base; + struct mv_cesa_ahash_dma_req dma; + struct mv_cesa_ahash_std_req std; + } req; + struct mv_cesa_op_ctx op_tmpl; + u8 *cache; + unsigned int cache_ptr; + u64 len; + int src_nents; + bool last_req; + __be32 state[8]; +}; + +/* CESA functions */ + +extern struct mv_cesa_dev *cesa_dev; + +static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, + u32 cfg, u32 mask) +{ + op->desc.config &= cpu_to_le32(~mask); + op->desc.config |= cpu_to_le32(cfg); +} + +static inline u32 mv_cesa_get_op_cfg(struct mv_cesa_op_ctx *op) +{ + return le32_to_cpu(op->desc.config); +} + +static inline void mv_cesa_set_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg) +{ + op->desc.config = cpu_to_le32(cfg); +} + +static inline void mv_cesa_adjust_op(struct mv_cesa_engine *engine, + struct mv_cesa_op_ctx *op) +{ + u32 offset = engine->sram_dma & CESA_SA_SRAM_MSK; + + op->desc.enc_p = CESA_SA_DESC_CRYPT_DATA(offset); + op->desc.enc_key_p = CESA_SA_DESC_CRYPT_KEY(offset); + op->desc.enc_iv = CESA_SA_DESC_CRYPT_IV(offset); + op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_DATA_MSK; + op->desc.mac_src_p |= CESA_SA_DESC_MAC_DATA(offset); + op->desc.mac_digest &= ~CESA_SA_DESC_MAC_DIGEST_MSK; + op->desc.mac_digest |= CESA_SA_DESC_MAC_DIGEST(offset); + op->desc.mac_iv = CESA_SA_DESC_MAC_IV(offset); +} + +static inline void mv_cesa_set_crypt_op_len(struct mv_cesa_op_ctx *op, int len) +{ + op->desc.enc_len = cpu_to_le32(len); +} + +static inline void mv_cesa_set_mac_op_total_len(struct mv_cesa_op_ctx *op, + int len) +{ + op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_TOTAL_LEN_MSK; + op->desc.mac_src_p |= CESA_SA_DESC_MAC_TOTAL_LEN(len); +} + +static inline void mv_cesa_set_mac_op_frag_len(struct mv_cesa_op_ctx *op, + int len) +{ + op->desc.mac_digest &= ~CESA_SA_DESC_MAC_FRAG_LEN_MSK; + op->desc.mac_digest |= CESA_SA_DESC_MAC_FRAG_LEN(len); +} + +static inline void mv_cesa_set_int_mask(struct mv_cesa_engine *engine, + u32 int_mask) +{ + if (int_mask == engine->int_mask) + return; + + writel(int_mask, engine->regs + CESA_SA_INT_MSK); + engine->int_mask = int_mask; +} + +static inline u32 mv_cesa_get_int_mask(struct mv_cesa_engine *engine) +{ + return engine->int_mask; +} + +int mv_cesa_queue_req(struct crypto_async_request *req); + +/* TDMA functions */ + +static inline void mv_cesa_req_dma_iter_init(struct mv_cesa_dma_iter *iter, + unsigned int len) +{ + iter->len = len; + iter->op_len = min(len, CESA_SA_SRAM_PAYLOAD_SIZE); + iter->offset = 0; +} + +static inline void mv_cesa_sg_dma_iter_init(struct mv_cesa_sg_dma_iter *iter, + struct scatterlist *sg, + enum dma_data_direction dir) +{ + iter->op_offset = 0; + iter->offset = 0; + iter->sg = sg; + iter->dir = dir; +} + +static inline unsigned int +mv_cesa_req_dma_iter_transfer_len(struct mv_cesa_dma_iter *iter, + struct mv_cesa_sg_dma_iter *sgiter) +{ + return min(iter->op_len - sgiter->op_offset, + sg_dma_len(sgiter->sg) - sgiter->offset); +} + +bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *chain, + struct mv_cesa_sg_dma_iter *sgiter, + unsigned int len); + +static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) +{ + iter->offset += iter->op_len; + iter->op_len = min(iter->len - iter->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + return iter->op_len; +} + +void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq); + +static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, + u32 status) +{ + if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) + return -EINPROGRESS; + + if (status & CESA_SA_INT_IDMA_OWN_ERR) + return -EINVAL; + + return 0; +} + +void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, + struct mv_cesa_engine *engine); + +void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq); + +static inline void +mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) +{ + memset(chain, 0, sizeof(*chain)); +} + +struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, + const struct mv_cesa_op_ctx *op_templ, + bool skip_ctx, + gfp_t flags); + +int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain, + dma_addr_t dst, dma_addr_t src, u32 size, + u32 flags, gfp_t gfp_flags); + +int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, + u32 flags); + +int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags); + +int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_dma_iter *dma_iter, + struct mv_cesa_sg_dma_iter *sgiter, + gfp_t gfp_flags); + +/* Algorithm definitions */ + +extern struct ahash_alg mv_md5_alg; +extern struct ahash_alg mv_sha1_alg; +extern struct ahash_alg mv_sha256_alg; +extern struct ahash_alg mv_ahmac_md5_alg; +extern struct ahash_alg mv_ahmac_sha1_alg; +extern struct ahash_alg mv_ahmac_sha256_alg; + +extern struct crypto_alg mv_cesa_ecb_des_alg; +extern struct crypto_alg mv_cesa_cbc_des_alg; +extern struct crypto_alg mv_cesa_ecb_des3_ede_alg; +extern struct crypto_alg mv_cesa_cbc_des3_ede_alg; +extern struct crypto_alg mv_cesa_ecb_aes_alg; +extern struct crypto_alg mv_cesa_cbc_aes_alg; + +#endif /* __MARVELL_CESA_H__ */ diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c new file mode 100644 index 0000000..0745cf3 --- /dev/null +++ b/drivers/crypto/marvell/cipher.c @@ -0,0 +1,797 @@ +/* + * Cipher algorithms supported by the CESA: DES, 3DES and AES. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/des.h> + +#include "cesa.h" + +struct mv_cesa_des_ctx { + struct mv_cesa_ctx base; + u8 key[DES_KEY_SIZE]; +}; + +struct mv_cesa_des3_ctx { + struct mv_cesa_ctx base; + u8 key[DES3_EDE_KEY_SIZE]; +}; + +struct mv_cesa_aes_ctx { + struct mv_cesa_ctx base; + struct crypto_aes_ctx aes; +}; + +struct mv_cesa_ablkcipher_dma_iter { + struct mv_cesa_dma_iter base; + struct mv_cesa_sg_dma_iter src; + struct mv_cesa_sg_dma_iter dst; +}; + +static inline void +mv_cesa_ablkcipher_req_iter_init(struct mv_cesa_ablkcipher_dma_iter *iter, + struct ablkcipher_request *req) +{ + mv_cesa_req_dma_iter_init(&iter->base, req->nbytes); + mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE); + mv_cesa_sg_dma_iter_init(&iter->dst, req->dst, DMA_FROM_DEVICE); +} + +static inline bool +mv_cesa_ablkcipher_req_iter_next_op(struct mv_cesa_ablkcipher_dma_iter *iter) +{ + iter->src.op_offset = 0; + iter->dst.op_offset = 0; + + return mv_cesa_req_dma_iter_next_op(&iter->base); +} + +static inline void +mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + + if (req->dst != req->src) { + dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + } else { + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_BIDIRECTIONAL); + } + mv_cesa_dma_cleanup(&creq->req.dma); +} + +static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ablkcipher_dma_cleanup(req); +} + +static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + size_t len = min_t(size_t, req->nbytes - sreq->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + len = sg_pcopy_to_buffer(req->src, creq->src_nents, + engine->sram + CESA_SA_DATA_SRAM_OFFSET, + len, sreq->offset); + + sreq->size = len; + mv_cesa_set_crypt_op_len(&sreq->op, len); + + /* FIXME: only update enc_len field */ + if (!sreq->skip_ctx) { + memcpy(engine->sram, &sreq->op, sizeof(sreq->op)); + sreq->skip_ctx = true; + } else { + memcpy(engine->sram, &sreq->op, sizeof(sreq->op.desc)); + } + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); + writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, + u32 status) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + size_t len; + + len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, + engine->sram + CESA_SA_DATA_SRAM_OFFSET, + sreq->size, sreq->offset); + + sreq->offset += len; + if (sreq->offset < req->nbytes) + return -EINPROGRESS; + + return 0; +} + +static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, + u32 status) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + int ret; + + if (creq->req.base.type == CESA_DMA_REQ) + ret = mv_cesa_dma_process(&creq->req.dma, status); + else + ret = mv_cesa_ablkcipher_std_process(ablkreq, status); + + if (ret) + return ret; + + memcpy(ablkreq->info, engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq))); + + return 0; +} + +static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->req.dma); + else + mv_cesa_ablkcipher_std_step(ablkreq); +} + +static inline void +mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_tdma_req *dreq = &creq->req.dma; + + mv_cesa_dma_prepare(dreq, dreq->base.engine); +} + +static inline void +mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + + sreq->size = 0; + sreq->offset = 0; + mv_cesa_adjust_op(engine, &sreq->op); + memcpy(engine->sram, &sreq->op, sizeof(sreq->op)); +} + +static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, + struct mv_cesa_engine *engine) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + + creq->req.base.engine = engine; + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ablkcipher_dma_prepare(ablkreq); + else + mv_cesa_ablkcipher_std_prepare(ablkreq); +} + +static inline void +mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + + mv_cesa_ablkcipher_cleanup(ablkreq); +} + +static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { + .step = mv_cesa_ablkcipher_step, + .process = mv_cesa_ablkcipher_process, + .prepare = mv_cesa_ablkcipher_prepare, + .cleanup = mv_cesa_ablkcipher_req_cleanup, +}; + +static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->base.ops = &mv_cesa_ablkcipher_req_ops; + + tfm->crt_ablkcipher.reqsize = sizeof(struct mv_cesa_ablkcipher_req); + + return 0; +} + +static int mv_cesa_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int len) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int remaining; + int offset; + int ret; + int i; + + ret = crypto_aes_expand_key(&ctx->aes, key, len); + if (ret) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return ret; + } + + remaining = (ctx->aes.key_length - 16) / 4; + offset = ctx->aes.key_length + 24 - remaining; + for (i = 0; i < remaining; i++) + ctx->aes.key_dec[4 + i] = + cpu_to_le32(ctx->aes.key_enc[offset + i]); + + return 0; +} + +static int mv_cesa_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key, + unsigned int len) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(tfm); + u32 tmp[DES_EXPKEY_WORDS]; + int ret; + + if (len != DES_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + ret = des_ekey(tmp, key); + if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; + return -EINVAL; + } + + memcpy(ctx->key, key, DES_KEY_SIZE); + + return 0; +} + +static int mv_cesa_des3_ede_setkey(struct crypto_ablkcipher *cipher, + const u8 *key, unsigned int len) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(tfm); + + if (len != DES3_EDE_KEY_SIZE) { + crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, DES3_EDE_KEY_SIZE); + + return 0; +} + +static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, + const struct mv_cesa_op_ctx *op_templ) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_ablkcipher_dma_iter iter; + struct mv_cesa_tdma_chain chain; + bool skip_ctx = false; + int ret; + + dreq->base.type = CESA_DMA_REQ; + dreq->chain.first = NULL; + dreq->chain.last = NULL; + + if (req->src != req->dst) { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + if (!ret) + return -ENOMEM; + + ret = dma_map_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + if (!ret) { + ret = -ENOMEM; + goto err_unmap_src; + } + } else { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_BIDIRECTIONAL); + if (!ret) + return -ENOMEM; + } + + mv_cesa_tdma_desc_iter_init(&chain); + mv_cesa_ablkcipher_req_iter_init(&iter, req); + + do { + struct mv_cesa_op_ctx *op; + + op = mv_cesa_dma_add_op(&chain, op_templ, skip_ctx, flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + skip_ctx = true; + + mv_cesa_set_crypt_op_len(op, iter.base.op_len); + + /* Add input transfers */ + ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + &iter.src, flags); + if (ret) + goto err_free_tdma; + + /* Add dummy desc to launch the crypto operation */ + ret = mv_cesa_dma_add_dummy_launch(&chain, flags); + if (ret) + goto err_free_tdma; + + /* Add output transfers */ + ret = mv_cesa_dma_add_op_transfers(&chain, &iter.base, + &iter.dst, flags); + if (ret) + goto err_free_tdma; + + } while (mv_cesa_ablkcipher_req_iter_next_op(&iter)); + + dreq->chain = chain; + + return 0; + +err_free_tdma: + mv_cesa_dma_cleanup(dreq); + if (req->dst != req->src) + dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + +err_unmap_src: + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + req->dst != req->src ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); + + return ret; +} + +static inline int +mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req, + const struct mv_cesa_op_ctx *op_templ) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + + sreq->base.type = CESA_STD_REQ; + sreq->op = *op_templ; + sreq->skip_ctx = false; + + return 0; +} + +static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + unsigned int blksize = crypto_ablkcipher_blocksize(tfm); + int ret; + + if (!IS_ALIGNED(req->nbytes, blksize)) + return -EINVAL; + + creq->src_nents = sg_nents_for_len(req->src, req->nbytes); + creq->dst_nents = sg_nents_for_len(req->dst, req->nbytes); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, + CESA_SA_DESC_CFG_OP_MSK); + + /* TODO: add a threshold for DMA usage */ + if (cesa_dev->caps->has_tdma) + ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl); + else + ret = mv_cesa_ablkcipher_std_req_init(req, tmpl); + + return ret; +} + +static int mv_cesa_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + int ret; + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + + ret = mv_cesa_ablkcipher_req_init(req, tmpl); + if (ret) + return ret; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) + mv_cesa_ablkcipher_cleanup(req); + + return ret; +} + +static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_des_op(req, &tmpl); +} + +static int mv_cesa_ecb_des_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_des_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_ecb_des_alg = { + .cra_name = "ecb(des)", + .cra_driver_name = "mv-ecb-des", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = mv_cesa_des_setkey, + .encrypt = mv_cesa_ecb_des_encrypt, + .decrypt = mv_cesa_ecb_des_decrypt, + }, + }, +}; + +static int mv_cesa_cbc_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC, + CESA_SA_DESC_CFG_CRYPTCM_MSK); + + memcpy(tmpl->ctx.blkcipher.iv, req->info, DES_BLOCK_SIZE); + + return mv_cesa_des_op(req, tmpl); +} + +static int mv_cesa_cbc_des_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_des_op(req, &tmpl); +} + +static int mv_cesa_cbc_des_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_des_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_cbc_des_alg = { + .cra_name = "cbc(des)", + .cra_driver_name = "mv-cbc-des", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = mv_cesa_des_setkey, + .encrypt = mv_cesa_cbc_des_encrypt, + .decrypt = mv_cesa_cbc_des_decrypt, + }, + }, +}; + +static int mv_cesa_des3_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + int ret; + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); + + ret = mv_cesa_ablkcipher_req_init(req, tmpl); + if (ret) + return ret; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) + mv_cesa_ablkcipher_cleanup(req); + + return ret; +} + +static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_des3_op(req, &tmpl); +} + +static int mv_cesa_ecb_des3_ede_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_des3_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_ecb_des3_ede_alg = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "mv-ecb-des3-ede", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = mv_cesa_des3_ede_setkey, + .encrypt = mv_cesa_ecb_des3_ede_encrypt, + .decrypt = mv_cesa_ecb_des3_ede_decrypt, + }, + }, +}; + +static int mv_cesa_cbc_des3_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + memcpy(tmpl->ctx.blkcipher.iv, req->info, DES3_EDE_BLOCK_SIZE); + + return mv_cesa_des3_op(req, tmpl); +} + +static int mv_cesa_cbc_des3_ede_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_CBC | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_des3_op(req, &tmpl); +} + +static int mv_cesa_cbc_des3_ede_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_CBC | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_des3_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_cbc_des3_ede_alg = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "mv-cbc-des3-ede", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = mv_cesa_des3_ede_setkey, + .encrypt = mv_cesa_cbc_des3_ede_encrypt, + .decrypt = mv_cesa_cbc_des3_ede_decrypt, + }, + }, +}; + +static int mv_cesa_aes_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + int ret, i; + u32 *key; + u32 cfg; + + cfg = CESA_SA_DESC_CFG_CRYPTM_AES; + + if (mv_cesa_get_op_cfg(tmpl) & CESA_SA_DESC_CFG_DIR_DEC) + key = ctx->aes.key_dec; + else + key = ctx->aes.key_enc; + + for (i = 0; i < ctx->aes.key_length / sizeof(u32); i++) + tmpl->ctx.blkcipher.key[i] = cpu_to_le32(key[i]); + + if (ctx->aes.key_length == 24) + cfg |= CESA_SA_DESC_CFG_AES_LEN_192; + else if (ctx->aes.key_length == 32) + cfg |= CESA_SA_DESC_CFG_AES_LEN_256; + + mv_cesa_update_op_cfg(tmpl, cfg, + CESA_SA_DESC_CFG_CRYPTM_MSK | + CESA_SA_DESC_CFG_AES_LEN_MSK); + + ret = mv_cesa_ablkcipher_req_init(req, tmpl); + if (ret) + return ret; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) + mv_cesa_ablkcipher_cleanup(req); + + return ret; +} + +static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_aes_op(req, &tmpl); +} + +static int mv_cesa_ecb_aes_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_aes_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_ecb_aes_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "mv-ecb-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = mv_cesa_aes_setkey, + .encrypt = mv_cesa_ecb_aes_encrypt, + .decrypt = mv_cesa_ecb_aes_decrypt, + }, + }, +}; + +static int mv_cesa_cbc_aes_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC, + CESA_SA_DESC_CFG_CRYPTCM_MSK); + memcpy(tmpl->ctx.blkcipher.iv, req->info, AES_BLOCK_SIZE); + + return mv_cesa_aes_op(req, tmpl); +} + +static int mv_cesa_cbc_aes_encrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_aes_op(req, &tmpl); +} + +static int mv_cesa_cbc_aes_decrypt(struct ablkcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_aes_op(req, &tmpl); +} + +struct crypto_alg mv_cesa_cbc_aes_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "mv-cbc-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_ablkcipher_cra_init, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = mv_cesa_aes_setkey, + .encrypt = mv_cesa_cbc_aes_encrypt, + .decrypt = mv_cesa_cbc_aes_decrypt, + }, + }, +}; diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c new file mode 100644 index 0000000..ae9272e --- /dev/null +++ b/drivers/crypto/marvell/hash.c @@ -0,0 +1,1441 @@ +/* + * Hash algorithms supported by the CESA: MD5, SHA1 and SHA256. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <crypto/md5.h> +#include <crypto/sha.h> + +#include "cesa.h" + +struct mv_cesa_ahash_dma_iter { + struct mv_cesa_dma_iter base; + struct mv_cesa_sg_dma_iter src; +}; + +static inline void +mv_cesa_ahash_req_iter_init(struct mv_cesa_ahash_dma_iter *iter, + struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int len = req->nbytes; + + if (!creq->last_req) + len = (len + creq->cache_ptr) & ~CESA_HASH_BLOCK_SIZE_MSK; + + mv_cesa_req_dma_iter_init(&iter->base, len); + mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE); + iter->src.op_offset = creq->cache_ptr; +} + +static inline bool +mv_cesa_ahash_req_iter_next_op(struct mv_cesa_ahash_dma_iter *iter) +{ + iter->src.op_offset = 0; + + return mv_cesa_req_dma_iter_next_op(&iter->base); +} + +static inline int mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_req *creq, + gfp_t flags) +{ + struct mv_cesa_ahash_dma_req *dreq = &creq->req.dma; + + creq->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags, + &dreq->cache_dma); + if (!creq->cache) + return -ENOMEM; + + return 0; +} + +static inline int mv_cesa_ahash_std_alloc_cache(struct mv_cesa_ahash_req *creq, + gfp_t flags) +{ + creq->cache = kzalloc(CESA_MAX_HASH_BLOCK_SIZE, flags); + if (!creq->cache) + return -ENOMEM; + + return 0; +} + +static int mv_cesa_ahash_alloc_cache(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + int ret; + + if (creq->cache) + return 0; + + if (creq->req.base.type == CESA_DMA_REQ) + ret = mv_cesa_ahash_dma_alloc_cache(creq, flags); + else + ret = mv_cesa_ahash_std_alloc_cache(creq, flags); + + return ret; +} + +static inline void mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_req *creq) +{ + dma_pool_free(cesa_dev->dma->cache_pool, creq->cache, + creq->req.dma.cache_dma); +} + +static inline void mv_cesa_ahash_std_free_cache(struct mv_cesa_ahash_req *creq) +{ + kfree(creq->cache); +} + +static void mv_cesa_ahash_free_cache(struct mv_cesa_ahash_req *creq) +{ + if (!creq->cache) + return; + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ahash_dma_free_cache(creq); + else + mv_cesa_ahash_std_free_cache(creq); + + creq->cache = NULL; +} + +static int mv_cesa_ahash_dma_alloc_padding(struct mv_cesa_ahash_dma_req *req, + gfp_t flags) +{ + if (req->padding) + return 0; + + req->padding = dma_pool_alloc(cesa_dev->dma->padding_pool, flags, + &req->padding_dma); + if (!req->padding) + return -ENOMEM; + + return 0; +} + +static void mv_cesa_ahash_dma_free_padding(struct mv_cesa_ahash_dma_req *req) +{ + if (!req->padding) + return; + + dma_pool_free(cesa_dev->dma->padding_pool, req->padding, + req->padding_dma); + req->padding = NULL; +} + +static inline void mv_cesa_ahash_dma_last_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + mv_cesa_ahash_dma_free_padding(&creq->req.dma); +} + +static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); + mv_cesa_dma_cleanup(&creq->req.dma.base); +} + +static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ahash_dma_cleanup(req); +} + +static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + mv_cesa_ahash_free_cache(creq); + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ahash_dma_last_cleanup(req); +} + +static int mv_cesa_ahash_pad_len(struct mv_cesa_ahash_req *creq) +{ + unsigned int index, padlen; + + index = creq->len & CESA_HASH_BLOCK_SIZE_MSK; + padlen = (index < 56) ? (56 - index) : (64 + 56 - index); + + return padlen; +} + +static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf) +{ + __be64 bits = cpu_to_be64(creq->len << 3); + unsigned int index, padlen; + + buf[0] = 0x80; + /* Pad out to 56 mod 64 */ + index = creq->len & CESA_HASH_BLOCK_SIZE_MSK; + padlen = mv_cesa_ahash_pad_len(creq); + memset(buf + 1, 0, padlen - 1); + memcpy(buf + padlen, &bits, sizeof(bits)); + + return padlen + 8; +} + +static void mv_cesa_ahash_std_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_op_ctx *op; + unsigned int new_cache_ptr = 0; + u32 frag_mode; + size_t len; + + if (creq->cache_ptr) + memcpy(engine->sram + CESA_SA_DATA_SRAM_OFFSET, creq->cache, + creq->cache_ptr); + + len = min_t(size_t, req->nbytes + creq->cache_ptr - sreq->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + if (!creq->last_req) { + new_cache_ptr = len & CESA_HASH_BLOCK_SIZE_MSK; + len &= ~CESA_HASH_BLOCK_SIZE_MSK; + } + + if (len - creq->cache_ptr) + sreq->offset += sg_pcopy_to_buffer(req->src, creq->src_nents, + engine->sram + + CESA_SA_DATA_SRAM_OFFSET + + creq->cache_ptr, + len - creq->cache_ptr, + sreq->offset); + + op = &creq->op_tmpl; + + frag_mode = mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK; + + if (creq->last_req && sreq->offset == req->nbytes && + creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) { + if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG) + frag_mode = CESA_SA_DESC_CFG_NOT_FRAG; + else if (frag_mode == CESA_SA_DESC_CFG_MID_FRAG) + frag_mode = CESA_SA_DESC_CFG_LAST_FRAG; + } + + if (frag_mode == CESA_SA_DESC_CFG_NOT_FRAG || + frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) { + if (len && + creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) { + mv_cesa_set_mac_op_total_len(op, creq->len); + } else { + int trailerlen = mv_cesa_ahash_pad_len(creq) + 8; + + if (len + trailerlen > CESA_SA_SRAM_PAYLOAD_SIZE) { + len &= CESA_HASH_BLOCK_SIZE_MSK; + new_cache_ptr = 64 - trailerlen; + memcpy(creq->cache, + engine->sram + + CESA_SA_DATA_SRAM_OFFSET + len, + new_cache_ptr); + } else { + len += mv_cesa_ahash_pad_req(creq, + engine->sram + len + + CESA_SA_DATA_SRAM_OFFSET); + } + + if (frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) + frag_mode = CESA_SA_DESC_CFG_MID_FRAG; + else + frag_mode = CESA_SA_DESC_CFG_FIRST_FRAG; + } + } + + mv_cesa_set_mac_op_frag_len(op, len); + mv_cesa_update_op_cfg(op, frag_mode, CESA_SA_DESC_CFG_FRAG_MSK); + + /* FIXME: only update enc_len field */ + memcpy(engine->sram, op, sizeof(*op)); + + if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG) + mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + creq->cache_ptr = new_cache_ptr; + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); + writel(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + + if (sreq->offset < (req->nbytes - creq->cache_ptr)) + return -EINPROGRESS; + + return 0; +} + +static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_tdma_req *dreq = &creq->req.dma.base; + + mv_cesa_dma_prepare(dreq, dreq->base.engine); +} + +static void mv_cesa_ahash_std_prepare(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = sreq->base.engine; + + sreq->offset = 0; + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); +} + +static void mv_cesa_ahash_step(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->req.dma.base); + else + mv_cesa_ahash_std_step(ahashreq); +} + +static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->req.base.engine; + unsigned int digsize; + int ret, i; + + if (creq->req.base.type == CESA_DMA_REQ) + ret = mv_cesa_dma_process(&creq->req.dma.base, status); + else + ret = mv_cesa_ahash_std_process(ahashreq, status); + + if (ret == -EINPROGRESS) + return ret; + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); + for (i = 0; i < digsize / 4; i++) + creq->state[i] = readl(engine->regs + CESA_IVDIG(i)); + + if (creq->cache_ptr) + sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, + creq->cache, + creq->cache_ptr, + ahashreq->nbytes - creq->cache_ptr); + + if (creq->last_req) { + for (i = 0; i < digsize / 4; i++) { + /* + * Hardware provides MD5 digest in a different + * endianness than SHA-1 and SHA-256 ones. + */ + if (digsize == MD5_DIGEST_SIZE) + creq->state[i] = cpu_to_le32(creq->state[i]); + else + creq->state[i] = cpu_to_be32(creq->state[i]); + } + + memcpy(ahashreq->result, creq->state, digsize); + } + + return ret; +} + +static void mv_cesa_ahash_prepare(struct crypto_async_request *req, + struct mv_cesa_engine *engine) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + unsigned int digsize; + int i; + + creq->req.base.engine = engine; + + if (creq->req.base.type == CESA_DMA_REQ) + mv_cesa_ahash_dma_prepare(ahashreq); + else + mv_cesa_ahash_std_prepare(ahashreq); + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); + for (i = 0; i < digsize / 4; i++) + writel(creq->state[i], + engine->regs + CESA_IVDIG(i)); +} + +static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + if (creq->last_req) + mv_cesa_ahash_last_cleanup(ahashreq); + + mv_cesa_ahash_cleanup(ahashreq); +} + +static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { + .step = mv_cesa_ahash_step, + .process = mv_cesa_ahash_process, + .prepare = mv_cesa_ahash_prepare, + .cleanup = mv_cesa_ahash_req_cleanup, +}; + +static int mv_cesa_ahash_init(struct ahash_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + memset(creq, 0, sizeof(*creq)); + mv_cesa_update_op_cfg(tmpl, + CESA_SA_DESC_CFG_OP_MAC_ONLY | + CESA_SA_DESC_CFG_FIRST_FRAG, + CESA_SA_DESC_CFG_OP_MSK | + CESA_SA_DESC_CFG_FRAG_MSK); + mv_cesa_set_mac_op_total_len(tmpl, 0); + mv_cesa_set_mac_op_frag_len(tmpl, 0); + creq->op_tmpl = *tmpl; + creq->len = 0; + + return 0; +} + +static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->base.ops = &mv_cesa_ahash_req_ops; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mv_cesa_ahash_req)); + return 0; +} + +static int mv_cesa_ahash_cache_req(struct ahash_request *req, bool *cached) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + int ret; + + if (((creq->cache_ptr + req->nbytes) & CESA_HASH_BLOCK_SIZE_MSK) && + !creq->last_req) { + ret = mv_cesa_ahash_alloc_cache(req); + if (ret) + return ret; + } + + if (creq->cache_ptr + req->nbytes < 64 && !creq->last_req) { + *cached = true; + + if (!req->nbytes) + return 0; + + sg_pcopy_to_buffer(req->src, creq->src_nents, + creq->cache + creq->cache_ptr, + req->nbytes, 0); + + creq->cache_ptr += req->nbytes; + } + + return 0; +} + +static struct mv_cesa_op_ctx * +mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_ahash_dma_iter *dma_iter, + struct mv_cesa_ahash_req *creq, + gfp_t flags) +{ + struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; + struct mv_cesa_op_ctx *op = NULL; + int ret; + + if (!creq->cache_ptr) + return NULL; + + ret = mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET, + ahashdreq->cache_dma, + creq->cache_ptr, + CESA_TDMA_DST_IN_SRAM, + flags); + if (ret) + return ERR_PTR(ret); + + if (!dma_iter->base.op_len) { + op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags); + if (IS_ERR(op)) + return op; + + mv_cesa_set_mac_op_frag_len(op, creq->cache_ptr); + + /* Add dummy desc to launch crypto operation */ + ret = mv_cesa_dma_add_dummy_launch(chain, flags); + if (ret) + return ERR_PTR(ret); + } + + return op; +} + +static struct mv_cesa_op_ctx * +mv_cesa_ahash_dma_add_data(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_ahash_dma_iter *dma_iter, + struct mv_cesa_ahash_req *creq, + gfp_t flags) +{ + struct mv_cesa_op_ctx *op; + int ret; + + op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags); + if (IS_ERR(op)) + return op; + + mv_cesa_set_mac_op_frag_len(op, dma_iter->base.op_len); + + if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) == + CESA_SA_DESC_CFG_FIRST_FRAG) + mv_cesa_update_op_cfg(&creq->op_tmpl, + CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + /* Add input transfers */ + ret = mv_cesa_dma_add_op_transfers(chain, &dma_iter->base, + &dma_iter->src, flags); + if (ret) + return ERR_PTR(ret); + + /* Add dummy desc to launch crypto operation */ + ret = mv_cesa_dma_add_dummy_launch(chain, flags); + if (ret) + return ERR_PTR(ret); + + return op; +} + +static struct mv_cesa_op_ctx * +mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_ahash_dma_iter *dma_iter, + struct mv_cesa_ahash_req *creq, + struct mv_cesa_op_ctx *op, + gfp_t flags) +{ + struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; + unsigned int len, trailerlen, padoff = 0; + int ret; + + if (!creq->last_req) + return op; + + if (op && creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) { + u32 frag = CESA_SA_DESC_CFG_NOT_FRAG; + + if ((mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK) != + CESA_SA_DESC_CFG_FIRST_FRAG) + frag = CESA_SA_DESC_CFG_LAST_FRAG; + + mv_cesa_update_op_cfg(op, frag, CESA_SA_DESC_CFG_FRAG_MSK); + + return op; + } + + ret = mv_cesa_ahash_dma_alloc_padding(ahashdreq, flags); + if (ret) + return ERR_PTR(ret); + + trailerlen = mv_cesa_ahash_pad_req(creq, ahashdreq->padding); + + if (op) { + len = min(CESA_SA_SRAM_PAYLOAD_SIZE - dma_iter->base.op_len, + trailerlen); + if (len) { + ret = mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET + + dma_iter->base.op_len, + ahashdreq->padding_dma, + len, CESA_TDMA_DST_IN_SRAM, + flags); + if (ret) + return ERR_PTR(ret); + + mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + mv_cesa_set_mac_op_frag_len(op, + dma_iter->base.op_len + len); + padoff += len; + } + } + + if (padoff >= trailerlen) + return op; + + if ((mv_cesa_get_op_cfg(&creq->op_tmpl) & CESA_SA_DESC_CFG_FRAG_MSK) != + CESA_SA_DESC_CFG_FIRST_FRAG) + mv_cesa_update_op_cfg(&creq->op_tmpl, + CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + op = mv_cesa_dma_add_op(chain, &creq->op_tmpl, false, flags); + if (IS_ERR(op)) + return op; + + mv_cesa_set_mac_op_frag_len(op, trailerlen - padoff); + + ret = mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET, + ahashdreq->padding_dma + + padoff, + trailerlen - padoff, + CESA_TDMA_DST_IN_SRAM, + flags); + if (ret) + return ERR_PTR(ret); + + /* Add dummy desc to launch crypto operation */ + ret = mv_cesa_dma_add_dummy_launch(chain, flags); + if (ret) + return ERR_PTR(ret); + + return op; +} + +static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; + struct mv_cesa_tdma_req *dreq = &ahashdreq->base; + struct mv_cesa_tdma_chain chain; + struct mv_cesa_ahash_dma_iter iter; + struct mv_cesa_op_ctx *op = NULL; + int ret; + + dreq->chain.first = NULL; + dreq->chain.last = NULL; + + if (creq->src_nents) { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + if (!ret) { + ret = -ENOMEM; + goto err; + } + } + + mv_cesa_tdma_desc_iter_init(&chain); + mv_cesa_ahash_req_iter_init(&iter, req); + + op = mv_cesa_ahash_dma_add_cache(&chain, &iter, + creq, flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + + do { + if (!iter.base.op_len) + break; + + op = mv_cesa_ahash_dma_add_data(&chain, &iter, + creq, flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + } while (mv_cesa_ahash_req_iter_next_op(&iter)); + + op = mv_cesa_ahash_dma_last_req(&chain, &iter, creq, op, flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + + if (op) { + /* Add dummy desc to wait for crypto operation end */ + ret = mv_cesa_dma_add_dummy_end(&chain, flags); + if (ret) + goto err_free_tdma; + } + + if (!creq->last_req) + creq->cache_ptr = req->nbytes + creq->cache_ptr - + iter.base.len; + else + creq->cache_ptr = 0; + + dreq->chain = chain; + + return 0; + +err_free_tdma: + mv_cesa_dma_cleanup(dreq); + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); + +err: + mv_cesa_ahash_last_cleanup(req); + + return ret; +} + +static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + int ret; + + if (cesa_dev->caps->has_tdma) + creq->req.base.type = CESA_DMA_REQ; + else + creq->req.base.type = CESA_STD_REQ; + + creq->src_nents = sg_nents_for_len(req->src, req->nbytes); + + ret = mv_cesa_ahash_cache_req(req, cached); + if (ret) + return ret; + + if (*cached) + return 0; + + if (creq->req.base.type == CESA_DMA_REQ) + ret = mv_cesa_ahash_dma_req_init(req); + + return ret; +} + +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + bool cached = false; + int ret; + + creq->len += req->nbytes; + ret = mv_cesa_ahash_req_init(req, &cached); + if (ret) + return ret; + + if (cached) + return 0; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) { + mv_cesa_ahash_cleanup(req); + return ret; + } + + return ret; +} + +static int mv_cesa_ahash_final(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; + bool cached = false; + int ret; + + mv_cesa_set_mac_op_total_len(tmpl, creq->len); + creq->last_req = true; + req->nbytes = 0; + + ret = mv_cesa_ahash_req_init(req, &cached); + if (ret) + return ret; + + if (cached) + return 0; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) + mv_cesa_ahash_cleanup(req); + + return ret; +} + +static int mv_cesa_ahash_finup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; + bool cached = false; + int ret; + + creq->len += req->nbytes; + mv_cesa_set_mac_op_total_len(tmpl, creq->len); + creq->last_req = true; + + ret = mv_cesa_ahash_req_init(req, &cached); + if (ret) + return ret; + + if (cached) + return 0; + + ret = mv_cesa_queue_req(&req->base); + if (ret && ret != -EINPROGRESS) + mv_cesa_ahash_cleanup(req); + + return ret; +} + +static int mv_cesa_md5_init(struct ahash_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_md5_export(struct ahash_request *req, void *out) +{ + struct md5_state *out_state = out; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + + out_state->byte_count = creq->len; + memcpy(out_state->hash, creq->state, digsize); + memset(out_state->block, 0, sizeof(out_state->block)); + if (creq->cache) + memcpy(out_state->block, creq->cache, creq->cache_ptr); + + return 0; +} + +static int mv_cesa_md5_import(struct ahash_request *req, const void *in) +{ + const struct md5_state *in_state = in; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + unsigned int cache_ptr; + int ret; + + creq->len = in_state->byte_count; + memcpy(creq->state, in_state->hash, digsize); + creq->cache_ptr = 0; + + cache_ptr = creq->len % sizeof(in_state->block); + if (!cache_ptr) + return 0; + + ret = mv_cesa_ahash_alloc_cache(req); + if (ret) + return ret; + + memcpy(creq->cache, in_state->block, cache_ptr); + creq->cache_ptr = cache_ptr; + + return 0; +} + +static int mv_cesa_md5_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_md5_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_md5_alg = { + .init = mv_cesa_md5_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_md5_digest, + .export = mv_cesa_md5_export, + .import = mv_cesa_md5_import, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .base = { + .cra_name = "md5", + .cra_driver_name = "mv-md5", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_sha1_init(struct ahash_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_sha1_export(struct ahash_request *req, void *out) +{ + struct sha1_state *out_state = out; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + + out_state->count = creq->len; + memcpy(out_state->state, creq->state, digsize); + memset(out_state->buffer, 0, sizeof(out_state->buffer)); + if (creq->cache) + memcpy(out_state->buffer, creq->cache, creq->cache_ptr); + + return 0; +} + +static int mv_cesa_sha1_import(struct ahash_request *req, const void *in) +{ + const struct sha1_state *in_state = in; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + unsigned int cache_ptr; + int ret; + + creq->len = in_state->count; + memcpy(creq->state, in_state->state, digsize); + creq->cache_ptr = 0; + + cache_ptr = creq->len % SHA1_BLOCK_SIZE; + if (!cache_ptr) + return 0; + + ret = mv_cesa_ahash_alloc_cache(req); + if (ret) + return ret; + + memcpy(creq->cache, in_state->buffer, cache_ptr); + creq->cache_ptr = cache_ptr; + + return 0; +} + +static int mv_cesa_sha1_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_sha1_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_sha1_alg = { + .init = mv_cesa_sha1_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_sha1_digest, + .export = mv_cesa_sha1_export, + .import = mv_cesa_sha1_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .base = { + .cra_name = "sha1", + .cra_driver_name = "mv-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_sha256_init(struct ahash_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_sha256_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_sha256_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +static int mv_cesa_sha256_export(struct ahash_request *req, void *out) +{ + struct sha256_state *out_state = out; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int ds = crypto_ahash_digestsize(ahash); + + out_state->count = creq->len; + memcpy(out_state->state, creq->state, ds); + memset(out_state->buf, 0, sizeof(out_state->buf)); + if (creq->cache) + memcpy(out_state->buf, creq->cache, creq->cache_ptr); + + return 0; +} + +static int mv_cesa_sha256_import(struct ahash_request *req, const void *in) +{ + const struct sha256_state *in_state = in; + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + unsigned int cache_ptr; + int ret; + + creq->len = in_state->count; + memcpy(creq->state, in_state->state, digsize); + creq->cache_ptr = 0; + + cache_ptr = creq->len % SHA256_BLOCK_SIZE; + if (!cache_ptr) + return 0; + + ret = mv_cesa_ahash_alloc_cache(req); + if (ret) + return ret; + + memcpy(creq->cache, in_state->buf, cache_ptr); + creq->cache_ptr = cache_ptr; + + return 0; +} + +struct ahash_alg mv_sha256_alg = { + .init = mv_cesa_sha256_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_sha256_digest, + .export = mv_cesa_sha256_export, + .import = mv_cesa_sha256_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .base = { + .cra_name = "sha256", + .cra_driver_name = "mv-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +struct mv_cesa_ahash_result { + struct completion completion; + int error; +}; + +static void mv_cesa_hmac_ahash_complete(struct crypto_async_request *req, + int error) +{ + struct mv_cesa_ahash_result *result = req->data; + + if (error == -EINPROGRESS) + return; + + result->error = error; + complete(&result->completion); +} + +static int mv_cesa_ahmac_iv_state_init(struct ahash_request *req, u8 *pad, + void *state, unsigned int blocksize) +{ + struct mv_cesa_ahash_result result; + struct scatterlist sg; + int ret; + + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + mv_cesa_hmac_ahash_complete, &result); + sg_init_one(&sg, pad, blocksize); + ahash_request_set_crypt(req, &sg, pad, blocksize); + init_completion(&result.completion); + + ret = crypto_ahash_init(req); + if (ret) + return ret; + + ret = crypto_ahash_update(req); + if (ret && ret != -EINPROGRESS) + return ret; + + wait_for_completion_interruptible(&result.completion); + if (result.error) + return result.error; + + ret = crypto_ahash_export(req, state); + if (ret) + return ret; + + return 0; +} + +static int mv_cesa_ahmac_pad_init(struct ahash_request *req, + const u8 *key, unsigned int keylen, + u8 *ipad, u8 *opad, + unsigned int blocksize) +{ + struct mv_cesa_ahash_result result; + struct scatterlist sg; + int ret; + int i; + + if (keylen <= blocksize) { + memcpy(ipad, key, keylen); + } else { + u8 *keydup = kmemdup(key, keylen, GFP_KERNEL); + + if (!keydup) + return -ENOMEM; + + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + mv_cesa_hmac_ahash_complete, + &result); + sg_init_one(&sg, keydup, keylen); + ahash_request_set_crypt(req, &sg, ipad, keylen); + init_completion(&result.completion); + + ret = crypto_ahash_digest(req); + if (ret == -EINPROGRESS) { + wait_for_completion_interruptible(&result.completion); + ret = result.error; + } + + /* Set the memory region to 0 to avoid any leak. */ + memset(keydup, 0, keylen); + kfree(keydup); + + if (ret) + return ret; + + keylen = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + } + + memset(ipad + keylen, 0, blocksize - keylen); + memcpy(opad, ipad, blocksize); + + for (i = 0; i < blocksize; i++) { + ipad[i] ^= 0x36; + opad[i] ^= 0x5c; + } + + return 0; +} + +static int mv_cesa_ahmac_setkey(const char *hash_alg_name, + const u8 *key, unsigned int keylen, + void *istate, void *ostate) +{ + struct ahash_request *req; + struct crypto_ahash *tfm; + unsigned int blocksize; + u8 *ipad = NULL; + u8 *opad; + int ret; + + tfm = crypto_alloc_ahash(hash_alg_name, CRYPTO_ALG_TYPE_AHASH, + CRYPTO_ALG_TYPE_AHASH_MASK); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto free_ahash; + } + + crypto_ahash_clear_flags(tfm, ~0); + + blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + ipad = kzalloc(2 * blocksize, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto free_req; + } + + opad = ipad + blocksize; + + ret = mv_cesa_ahmac_pad_init(req, key, keylen, ipad, opad, blocksize); + if (ret) + goto free_ipad; + + ret = mv_cesa_ahmac_iv_state_init(req, ipad, istate, blocksize); + if (ret) + goto free_ipad; + + ret = mv_cesa_ahmac_iv_state_init(req, opad, ostate, blocksize); + +free_ipad: + kfree(ipad); +free_req: + ahash_request_free(req); +free_ahash: + crypto_free_ahash(tfm); + + return ret; +} + +static int mv_cesa_ahmac_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->base.ops = &mv_cesa_ahash_req_ops; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mv_cesa_ahash_req)); + return 0; +} + +static int mv_cesa_ahmac_md5_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_MD5); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_ahmac_md5_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct md5_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-md5", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.hash); i++) + ctx->iv[i] = be32_to_cpu(istate.hash[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.hash); i++) + ctx->iv[i + 8] = be32_to_cpu(ostate.hash[i]); + + return 0; +} + +static int mv_cesa_ahmac_md5_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_md5_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_md5_alg = { + .init = mv_cesa_ahmac_md5_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_md5_digest, + .setkey = mv_cesa_ahmac_md5_setkey, + .export = mv_cesa_md5_export, + .import = mv_cesa_md5_import, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "hmac(md5)", + .cra_driver_name = "mv-hmac-md5", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_ahmac_sha1_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA1); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_ahmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct sha1_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-sha1", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.state); i++) + ctx->iv[i] = be32_to_cpu(istate.state[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.state); i++) + ctx->iv[i + 8] = be32_to_cpu(ostate.state[i]); + + return 0; +} + +static int mv_cesa_ahmac_sha1_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_sha1_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_sha1_alg = { + .init = mv_cesa_ahmac_sha1_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_sha1_digest, + .setkey = mv_cesa_ahmac_sha1_setkey, + .export = mv_cesa_sha1_export, + .import = mv_cesa_sha1_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mv-hmac-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_ahmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct sha256_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-sha256", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.state); i++) + ctx->iv[i] = be32_to_cpu(istate.state[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.state); i++) + ctx->iv[i + 8] = be32_to_cpu(ostate.state[i]); + + return 0; +} + +static int mv_cesa_ahmac_sha256_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA256); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl); + + return 0; +} + +static int mv_cesa_ahmac_sha256_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_sha256_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_sha256_alg = { + .init = mv_cesa_ahmac_sha256_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_sha256_digest, + .setkey = mv_cesa_ahmac_sha256_setkey, + .export = mv_cesa_sha256_export, + .import = mv_cesa_sha256_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "mv-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c new file mode 100644 index 0000000..64a366c --- /dev/null +++ b/drivers/crypto/marvell/tdma.c @@ -0,0 +1,224 @@ +/* + * Provide TDMA helper functions used by cipher and hash algorithm + * implementations. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include "cesa.h" + +bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, + struct mv_cesa_sg_dma_iter *sgiter, + unsigned int len) +{ + if (!sgiter->sg) + return false; + + sgiter->op_offset += len; + sgiter->offset += len; + if (sgiter->offset == sg_dma_len(sgiter->sg)) { + if (sg_is_last(sgiter->sg)) + return false; + sgiter->offset = 0; + sgiter->sg = sg_next(sgiter->sg); + } + + if (sgiter->op_offset == iter->op_len) + return false; + + return true; +} + +void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) +{ + struct mv_cesa_engine *engine = dreq->base.engine; + + writel(0, engine->regs + CESA_SA_CFG); + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE); + writel(CESA_TDMA_DST_BURST_128B | CESA_TDMA_SRC_BURST_128B | + CESA_TDMA_NO_BYTE_SWAP | CESA_TDMA_EN, + engine->regs + CESA_TDMA_CONTROL); + + writel(CESA_SA_CFG_ACT_CH0_IDMA | CESA_SA_CFG_MULTI_PKT | + CESA_SA_CFG_CH0_W_IDMA | CESA_SA_CFG_PARA_DIS, + engine->regs + CESA_SA_CFG); + writel(dreq->chain.first->cur_dma, + engine->regs + CESA_TDMA_NEXT_ADDR); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) +{ + struct mv_cesa_tdma_desc *tdma; + + for (tdma = dreq->chain.first; tdma;) { + struct mv_cesa_tdma_desc *old_tdma = tdma; + + if (tdma->flags & CESA_TDMA_OP) + dma_pool_free(cesa_dev->dma->op_pool, tdma->op, + le32_to_cpu(tdma->src)); + + tdma = tdma->next; + dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, + le32_to_cpu(old_tdma->cur_dma)); + } + + dreq->chain.first = NULL; + dreq->chain.last = NULL; +} + +void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, + struct mv_cesa_engine *engine) +{ + struct mv_cesa_tdma_desc *tdma; + + for (tdma = dreq->chain.first; tdma; tdma = tdma->next) { + if (tdma->flags & CESA_TDMA_DST_IN_SRAM) + tdma->dst = cpu_to_le32(tdma->dst + engine->sram_dma); + + if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) + tdma->src = cpu_to_le32(tdma->src + engine->sram_dma); + + if (tdma->flags & CESA_TDMA_OP) + mv_cesa_adjust_op(engine, tdma->op); + } +} + +static struct mv_cesa_tdma_desc * +mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) +{ + struct mv_cesa_tdma_desc *new_tdma = NULL; + dma_addr_t dma_handle; + + new_tdma = dma_pool_alloc(cesa_dev->dma->tdma_desc_pool, flags, + &dma_handle); + if (!new_tdma) + return ERR_PTR(-ENOMEM); + + memset(new_tdma, 0, sizeof(*new_tdma)); + new_tdma->cur_dma = cpu_to_le32(dma_handle); + if (chain->last) { + chain->last->next_dma = new_tdma->cur_dma; + chain->last->next = new_tdma; + } else { + chain->first = new_tdma; + } + + chain->last = new_tdma; + + return new_tdma; +} + +struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, + const struct mv_cesa_op_ctx *op_templ, + bool skip_ctx, + gfp_t flags) +{ + struct mv_cesa_tdma_desc *tdma; + struct mv_cesa_op_ctx *op; + dma_addr_t dma_handle; + + tdma = mv_cesa_dma_add_desc(chain, flags); + if (IS_ERR(tdma)) + return ERR_CAST(tdma); + + op = dma_pool_alloc(cesa_dev->dma->op_pool, flags, &dma_handle); + if (!op) + return ERR_PTR(-ENOMEM); + + *op = *op_templ; + + tdma = chain->last; + tdma->op = op; + tdma->byte_cnt = (skip_ctx ? sizeof(op->desc) : sizeof(*op)) | BIT(31); + tdma->src = dma_handle; + tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP; + + return op; +} + +int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain, + dma_addr_t dst, dma_addr_t src, u32 size, + u32 flags, gfp_t gfp_flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + tdma->byte_cnt = size | BIT(31); + tdma->src = src; + tdma->dst = dst; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_DATA; + + return 0; +} + +int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, + u32 flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + return 0; +} + +int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, u32 flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + tdma->byte_cnt = BIT(31); + + return 0; +} + +int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_dma_iter *dma_iter, + struct mv_cesa_sg_dma_iter *sgiter, + gfp_t gfp_flags) +{ + u32 flags = sgiter->dir == DMA_TO_DEVICE ? + CESA_TDMA_DST_IN_SRAM : CESA_TDMA_SRC_IN_SRAM; + unsigned int len; + + do { + dma_addr_t dst, src; + int ret; + + len = mv_cesa_req_dma_iter_transfer_len(dma_iter, sgiter); + if (sgiter->dir == DMA_TO_DEVICE) { + dst = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset; + src = sg_dma_address(sgiter->sg) + sgiter->offset; + } else { + dst = sg_dma_address(sgiter->sg) + sgiter->offset; + src = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset; + } + + ret = mv_cesa_dma_add_data_transfer(chain, dst, src, len, + flags, gfp_flags); + if (ret) + return ret; + + } while (mv_cesa_req_dma_iter_next_transfer(dma_iter, sgiter, len)); + + return 0; +} diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index f91f15d..5bcd575 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -9,6 +9,7 @@ #include <crypto/aes.h> #include <crypto/algapi.h> #include <linux/crypto.h> +#include <linux/genalloc.h> #include <linux/interrupt.h> #include <linux/io.h> #include <linux/kthread.h> @@ -29,6 +30,8 @@ #define MAX_HW_HASH_SIZE 0xFFFF #define MV_CESA_EXPIRE 500 /* msec */ +#define MV_CESA_DEFAULT_SRAM_SIZE 2048 + /* * STM: * /---------------------------------------\ @@ -83,6 +86,8 @@ struct req_progress { struct crypto_priv { void __iomem *reg; void __iomem *sram; + struct gen_pool *sram_pool; + dma_addr_t sram_dma; int irq; struct clk *clk; struct task_struct *queue_th; @@ -595,7 +600,7 @@ static int queue_manag(void *data) cpg->eng_st = ENGINE_IDLE; do { struct crypto_async_request *async_req = NULL; - struct crypto_async_request *backlog; + struct crypto_async_request *backlog = NULL; __set_current_state(TASK_INTERRUPTIBLE); @@ -1019,6 +1024,39 @@ static struct ahash_alg mv_hmac_sha1_alg = { } }; +static int mv_cesa_get_sram(struct platform_device *pdev, + struct crypto_priv *cp) +{ + struct resource *res; + u32 sram_size = MV_CESA_DEFAULT_SRAM_SIZE; + + of_property_read_u32(pdev->dev.of_node, "marvell,crypto-sram-size", + &sram_size); + + cp->sram_size = sram_size; + cp->sram_pool = of_get_named_gen_pool(pdev->dev.of_node, + "marvell,crypto-srams", 0); + if (cp->sram_pool) { + cp->sram = gen_pool_dma_alloc(cp->sram_pool, sram_size, + &cp->sram_dma); + if (cp->sram) + return 0; + + return -ENOMEM; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "sram"); + if (!res || resource_size(res) < cp->sram_size) + return -EINVAL; + + cp->sram = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cp->sram)) + return PTR_ERR(cp->sram); + + return 0; +} + static int mv_probe(struct platform_device *pdev) { struct crypto_priv *cp; @@ -1041,24 +1079,17 @@ static int mv_probe(struct platform_device *pdev) spin_lock_init(&cp->lock); crypto_init_queue(&cp->queue, 50); - cp->reg = ioremap(res->start, resource_size(res)); - if (!cp->reg) { - ret = -ENOMEM; + cp->reg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cp->reg)) { + ret = PTR_ERR(cp->reg); goto err; } - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram"); - if (!res) { - ret = -ENXIO; - goto err_unmap_reg; - } - cp->sram_size = resource_size(res); + ret = mv_cesa_get_sram(pdev, cp); + if (ret) + goto err; + cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE; - cp->sram = ioremap(res->start, cp->sram_size); - if (!cp->sram) { - ret = -ENOMEM; - goto err_unmap_reg; - } if (pdev->dev.of_node) irq = irq_of_parse_and_map(pdev->dev.of_node, 0); @@ -1066,7 +1097,7 @@ static int mv_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); if (irq < 0 || irq == NO_IRQ) { ret = irq; - goto err_unmap_sram; + goto err; } cp->irq = irq; @@ -1076,7 +1107,7 @@ static int mv_probe(struct platform_device *pdev) cp->queue_th = kthread_run(queue_manag, cp, "mv_crypto"); if (IS_ERR(cp->queue_th)) { ret = PTR_ERR(cp->queue_th); - goto err_unmap_sram; + goto err; } ret = request_irq(irq, crypto_int, 0, dev_name(&pdev->dev), @@ -1134,10 +1165,6 @@ err_irq: } err_thread: kthread_stop(cp->queue_th); -err_unmap_sram: - iounmap(cp->sram); -err_unmap_reg: - iounmap(cp->reg); err: kfree(cp); cpg = NULL; @@ -1157,8 +1184,6 @@ static int mv_remove(struct platform_device *pdev) kthread_stop(cp->queue_th); free_irq(cp->irq, cp); memset(cp->sram, 0, cp->sram_size); - iounmap(cp->sram); - iounmap(cp->reg); if (!IS_ERR(cp->clk)) { clk_disable_unprepare(cp->clk); @@ -1172,6 +1197,8 @@ static int mv_remove(struct platform_device *pdev) static const struct of_device_id mv_cesa_of_match_table[] = { { .compatible = "marvell,orion-crypto", }, + { .compatible = "marvell,kirkwood-crypto", }, + { .compatible = "marvell,dove-crypto", }, {} }; MODULE_DEVICE_TABLE(of, mv_cesa_of_match_table); diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 10a9aef..2e8dab9 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1281,10 +1281,10 @@ static const char md5_zero[MD5_DIGEST_SIZE] = { 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, }; static const u32 md5_init[MD5_HASH_WORDS] = { - cpu_to_le32(0x67452301), - cpu_to_le32(0xefcdab89), - cpu_to_le32(0x98badcfe), - cpu_to_le32(0x10325476), + cpu_to_le32(MD5_H0), + cpu_to_le32(MD5_H1), + cpu_to_le32(MD5_H2), + cpu_to_le32(MD5_H3), }; static const char sha1_zero[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig index f826166..e421c96 100644 --- a/drivers/crypto/nx/Kconfig +++ b/drivers/crypto/nx/Kconfig @@ -1,26 +1,55 @@ + config CRYPTO_DEV_NX_ENCRYPT - tristate "Encryption acceleration support" - depends on PPC64 && IBMVIO + tristate "Encryption acceleration support on pSeries platform" + depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN default y select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_ECB select CRYPTO_CCM - select CRYPTO_GCM - select CRYPTO_AUTHENC - select CRYPTO_XCBC - select CRYPTO_SHA256 - select CRYPTO_SHA512 help - Support for Power7+ in-Nest encryption acceleration. This - module supports acceleration for AES and SHA2 algorithms. If you - choose 'M' here, this module will be called nx_crypto. + Support for PowerPC Nest (NX) encryption acceleration. This + module supports acceleration for AES and SHA2 algorithms on + the pSeries platform. If you choose 'M' here, this module + will be called nx_crypto. config CRYPTO_DEV_NX_COMPRESS tristate "Compression acceleration support" - depends on PPC64 && IBMVIO default y help - Support for Power7+ in-Nest compression acceleration. This - module supports acceleration for AES and SHA2 algorithms. If you - choose 'M' here, this module will be called nx_compress. + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. One of the platform drivers must be selected also. + If you choose 'M' here, this module will be called nx_compress. + +if CRYPTO_DEV_NX_COMPRESS + +config CRYPTO_DEV_NX_COMPRESS_PSERIES + tristate "Compression acceleration support on pSeries platform" + depends on PPC_PSERIES && IBMVIO + default y + help + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. This supports NX hardware on the pSeries platform. + If you choose 'M' here, this module will be called nx_compress_pseries. + +config CRYPTO_DEV_NX_COMPRESS_POWERNV + tristate "Compression acceleration support on PowerNV platform" + depends on PPC_POWERNV + default y + help + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. This supports NX hardware on the PowerNV platform. + If you choose 'M' here, this module will be called nx_compress_powernv. + +config CRYPTO_DEV_NX_COMPRESS_CRYPTO + tristate "Compression acceleration cryptographic interface" + select CRYPTO_ALGAPI + select 842_DECOMPRESS + default y + help + Support for PowerPC Nest (NX) accelerators using the cryptographic + API. If you choose 'M' here, this module will be called + nx_compress_crypto. + +endif diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bb770ea..e1684f5 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -10,5 +10,12 @@ nx-crypto-objs := nx.o \ nx-sha256.o \ nx-sha512.o -obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o nx-compress-platform.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o nx-compress-objs := nx-842.o +nx-compress-platform-objs := nx-842-platform.o +nx-compress-pseries-objs := nx-842-pseries.o +nx-compress-powernv-objs := nx-842-powernv.o +nx-compress-crypto-objs := nx-842-crypto.o diff --git a/drivers/crypto/nx/nx-842-crypto.c b/drivers/crypto/nx/nx-842-crypto.c new file mode 100644 index 0000000..d53a1dc --- /dev/null +++ b/drivers/crypto/nx/nx-842-crypto.c @@ -0,0 +1,580 @@ +/* + * Cryptographic API for the NX-842 hardware compression. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) IBM Corporation, 2011-2015 + * + * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> + * + * Rewrite: Dan Streetman <ddstreet@ieee.org> + * + * This is an interface to the NX-842 compression hardware in PowerPC + * processors. Most of the complexity of this drvier is due to the fact that + * the NX-842 compression hardware requires the input and output data buffers + * to be specifically aligned, to be a specific multiple in length, and within + * specific minimum and maximum lengths. Those restrictions, provided by the + * nx-842 driver via nx842_constraints, mean this driver must use bounce + * buffers and headers to correct misaligned in or out buffers, and to split + * input buffers that are too large. + * + * This driver will fall back to software decompression if the hardware + * decompression fails, so this driver's decompression should never fail as + * long as the provided compressed buffer is valid. Any compressed buffer + * created by this driver will have a header (except ones where the input + * perfectly matches the constraints); so users of this driver cannot simply + * pass a compressed buffer created by this driver over to the 842 software + * decompression library. Instead, users must use this driver to decompress; + * if the hardware fails or is unavailable, the compressed buffer will be + * parsed and the header removed, and the raw 842 buffer(s) passed to the 842 + * software decompression library. + * + * This does not fall back to software compression, however, since the caller + * of this function is specifically requesting hardware compression; if the + * hardware compression fails, the caller can fall back to software + * compression, and the raw 842 compressed buffer that the software compressor + * creates can be passed to this driver for hardware decompression; any + * buffer without our specific header magic is assumed to be a raw 842 buffer + * and passed directly to the hardware. Note that the software compression + * library will produce a compressed buffer that is incompatible with the + * hardware decompressor if the original input buffer length is not a multiple + * of 8; if such a compressed buffer is passed to this driver for + * decompression, the hardware will reject it and this driver will then pass + * it over to the software library for decompression. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/sw842.h> +#include <linux/ratelimit.h> + +#include "nx-842.h" + +/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit + * template (see lib/842/842.h), so this magic number will never appear at + * the start of a raw 842 compressed buffer. That is important, as any buffer + * passed to us without this magic is assumed to be a raw 842 compressed + * buffer, and passed directly to the hardware to decompress. + */ +#define NX842_CRYPTO_MAGIC (0xf842) +#define NX842_CRYPTO_GROUP_MAX (0x20) +#define NX842_CRYPTO_HEADER_SIZE(g) \ + (sizeof(struct nx842_crypto_header) + \ + sizeof(struct nx842_crypto_header_group) * (g)) +#define NX842_CRYPTO_HEADER_MAX_SIZE \ + NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX) + +/* bounce buffer size */ +#define BOUNCE_BUFFER_ORDER (2) +#define BOUNCE_BUFFER_SIZE \ + ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER)) + +/* try longer on comp because we can fallback to sw decomp if hw is busy */ +#define COMP_BUSY_TIMEOUT (250) /* ms */ +#define DECOMP_BUSY_TIMEOUT (50) /* ms */ + +struct nx842_crypto_header_group { + __be16 padding; /* unused bytes at start of group */ + __be32 compressed_length; /* compressed bytes in group */ + __be32 uncompressed_length; /* bytes after decompression */ +} __packed; + +struct nx842_crypto_header { + __be16 magic; /* NX842_CRYPTO_MAGIC */ + __be16 ignore; /* decompressed end bytes to ignore */ + u8 groups; /* total groups in this header */ + struct nx842_crypto_header_group group[]; +} __packed; + +struct nx842_crypto_param { + u8 *in; + unsigned int iremain; + u8 *out; + unsigned int oremain; + unsigned int ototal; +}; + +static int update_param(struct nx842_crypto_param *p, + unsigned int slen, unsigned int dlen) +{ + if (p->iremain < slen) + return -EOVERFLOW; + if (p->oremain < dlen) + return -ENOSPC; + + p->in += slen; + p->iremain -= slen; + p->out += dlen; + p->oremain -= dlen; + p->ototal += dlen; + + return 0; +} + +struct nx842_crypto_ctx { + u8 *wmem; + u8 *sbounce, *dbounce; + + struct nx842_crypto_header header; + struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; +}; + +static int nx842_crypto_init(struct crypto_tfm *tfm) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->wmem = kmalloc(nx842_workmem_size(), GFP_KERNEL); + ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); + ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); + if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { + kfree(ctx->wmem); + free_page((unsigned long)ctx->sbounce); + free_page((unsigned long)ctx->dbounce); + return -ENOMEM; + } + + return 0; +} + +static void nx842_crypto_exit(struct crypto_tfm *tfm) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + + kfree(ctx->wmem); + free_page((unsigned long)ctx->sbounce); + free_page((unsigned long)ctx->dbounce); +} + +static int read_constraints(struct nx842_constraints *c) +{ + int ret; + + ret = nx842_constraints(c); + if (ret) { + pr_err_ratelimited("could not get nx842 constraints : %d\n", + ret); + return ret; + } + + /* limit maximum, to always have enough bounce buffer to decompress */ + if (c->maximum > BOUNCE_BUFFER_SIZE) { + c->maximum = BOUNCE_BUFFER_SIZE; + pr_info_once("limiting nx842 maximum to %x\n", c->maximum); + } + + return 0; +} + +static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf) +{ + int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups); + + /* compress should have added space for header */ + if (s > be16_to_cpu(hdr->group[0].padding)) { + pr_err("Internal error: no space for header\n"); + return -EINVAL; + } + + memcpy(buf, hdr, s); + + print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0); + + return 0; +} + +static int compress(struct nx842_crypto_ctx *ctx, + struct nx842_crypto_param *p, + struct nx842_crypto_header_group *g, + struct nx842_constraints *c, + u16 *ignore, + unsigned int hdrsize) +{ + unsigned int slen = p->iremain, dlen = p->oremain, tmplen; + unsigned int adj_slen = slen; + u8 *src = p->in, *dst = p->out; + int ret, dskip = 0; + ktime_t timeout; + + if (p->iremain == 0) + return -EOVERFLOW; + + if (p->oremain == 0 || hdrsize + c->minimum > dlen) + return -ENOSPC; + + if (slen % c->multiple) + adj_slen = round_up(slen, c->multiple); + if (slen < c->minimum) + adj_slen = c->minimum; + if (slen > c->maximum) + adj_slen = slen = c->maximum; + if (adj_slen > slen || (u64)src % c->alignment) { + adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE); + slen = min(slen, BOUNCE_BUFFER_SIZE); + if (adj_slen > slen) + memset(ctx->sbounce + slen, 0, adj_slen - slen); + memcpy(ctx->sbounce, src, slen); + src = ctx->sbounce; + slen = adj_slen; + pr_debug("using comp sbounce buffer, len %x\n", slen); + } + + dst += hdrsize; + dlen -= hdrsize; + + if ((u64)dst % c->alignment) { + dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst); + dst += dskip; + dlen -= dskip; + } + if (dlen % c->multiple) + dlen = round_down(dlen, c->multiple); + if (dlen < c->minimum) { +nospc: + dst = ctx->dbounce; + dlen = min(p->oremain, BOUNCE_BUFFER_SIZE); + dlen = round_down(dlen, c->multiple); + dskip = 0; + pr_debug("using comp dbounce buffer, len %x\n", dlen); + } + if (dlen > c->maximum) + dlen = c->maximum; + + tmplen = dlen; + timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT); + do { + dlen = tmplen; /* reset dlen, if we're retrying */ + ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem); + /* possibly we should reduce the slen here, instead of + * retrying with the dbounce buffer? + */ + if (ret == -ENOSPC && dst != ctx->dbounce) + goto nospc; + } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); + if (ret) + return ret; + + dskip += hdrsize; + + if (dst == ctx->dbounce) + memcpy(p->out + dskip, dst, dlen); + + g->padding = cpu_to_be16(dskip); + g->compressed_length = cpu_to_be32(dlen); + g->uncompressed_length = cpu_to_be32(slen); + + if (p->iremain < slen) { + *ignore = slen - p->iremain; + slen = p->iremain; + } + + pr_debug("compress slen %x ignore %x dlen %x padding %x\n", + slen, *ignore, dlen, dskip); + + return update_param(p, slen, dskip + dlen); +} + +static int nx842_crypto_compress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + struct nx842_crypto_header *hdr = &ctx->header; + struct nx842_crypto_param p; + struct nx842_constraints c; + unsigned int groups, hdrsize, h; + int ret, n; + bool add_header; + u16 ignore = 0; + + p.in = (u8 *)src; + p.iremain = slen; + p.out = dst; + p.oremain = *dlen; + p.ototal = 0; + + *dlen = 0; + + ret = read_constraints(&c); + if (ret) + return ret; + + groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX, + DIV_ROUND_UP(p.iremain, c.maximum)); + hdrsize = NX842_CRYPTO_HEADER_SIZE(groups); + + /* skip adding header if the buffers meet all constraints */ + add_header = (p.iremain % c.multiple || + p.iremain < c.minimum || + p.iremain > c.maximum || + (u64)p.in % c.alignment || + p.oremain % c.multiple || + p.oremain < c.minimum || + p.oremain > c.maximum || + (u64)p.out % c.alignment); + + hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC); + hdr->groups = 0; + hdr->ignore = 0; + + while (p.iremain > 0) { + n = hdr->groups++; + if (hdr->groups > NX842_CRYPTO_GROUP_MAX) + return -ENOSPC; + + /* header goes before first group */ + h = !n && add_header ? hdrsize : 0; + + if (ignore) + pr_warn("interal error, ignore is set %x\n", ignore); + + ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h); + if (ret) + return ret; + } + + if (!add_header && hdr->groups > 1) { + pr_err("Internal error: No header but multiple groups\n"); + return -EINVAL; + } + + /* ignore indicates the input stream needed to be padded */ + hdr->ignore = cpu_to_be16(ignore); + if (ignore) + pr_debug("marked %d bytes as ignore\n", ignore); + + if (add_header) + ret = nx842_crypto_add_header(hdr, dst); + if (ret) + return ret; + + *dlen = p.ototal; + + pr_debug("compress total slen %x dlen %x\n", slen, *dlen); + + return 0; +} + +static int decompress(struct nx842_crypto_ctx *ctx, + struct nx842_crypto_param *p, + struct nx842_crypto_header_group *g, + struct nx842_constraints *c, + u16 ignore, + bool usehw) +{ + unsigned int slen = be32_to_cpu(g->compressed_length); + unsigned int required_len = be32_to_cpu(g->uncompressed_length); + unsigned int dlen = p->oremain, tmplen; + unsigned int adj_slen = slen; + u8 *src = p->in, *dst = p->out; + u16 padding = be16_to_cpu(g->padding); + int ret, spadding = 0, dpadding = 0; + ktime_t timeout; + + if (!slen || !required_len) + return -EINVAL; + + if (p->iremain <= 0 || padding + slen > p->iremain) + return -EOVERFLOW; + + if (p->oremain <= 0 || required_len - ignore > p->oremain) + return -ENOSPC; + + src += padding; + + if (!usehw) + goto usesw; + + if (slen % c->multiple) + adj_slen = round_up(slen, c->multiple); + if (slen < c->minimum) + adj_slen = c->minimum; + if (slen > c->maximum) + goto usesw; + if (slen < adj_slen || (u64)src % c->alignment) { + /* we can append padding bytes because the 842 format defines + * an "end" template (see lib/842/842_decompress.c) and will + * ignore any bytes following it. + */ + if (slen < adj_slen) + memset(ctx->sbounce + slen, 0, adj_slen - slen); + memcpy(ctx->sbounce, src, slen); + src = ctx->sbounce; + spadding = adj_slen - slen; + slen = adj_slen; + pr_debug("using decomp sbounce buffer, len %x\n", slen); + } + + if (dlen % c->multiple) + dlen = round_down(dlen, c->multiple); + if (dlen < required_len || (u64)dst % c->alignment) { + dst = ctx->dbounce; + dlen = min(required_len, BOUNCE_BUFFER_SIZE); + pr_debug("using decomp dbounce buffer, len %x\n", dlen); + } + if (dlen < c->minimum) + goto usesw; + if (dlen > c->maximum) + dlen = c->maximum; + + tmplen = dlen; + timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT); + do { + dlen = tmplen; /* reset dlen, if we're retrying */ + ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem); + } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); + if (ret) { +usesw: + /* reset everything, sw doesn't have constraints */ + src = p->in + padding; + slen = be32_to_cpu(g->compressed_length); + spadding = 0; + dst = p->out; + dlen = p->oremain; + dpadding = 0; + if (dlen < required_len) { /* have ignore bytes */ + dst = ctx->dbounce; + dlen = BOUNCE_BUFFER_SIZE; + } + pr_info_ratelimited("using software 842 decompression\n"); + ret = sw842_decompress(src, slen, dst, &dlen); + } + if (ret) + return ret; + + slen -= spadding; + + dlen -= ignore; + if (ignore) + pr_debug("ignoring last %x bytes\n", ignore); + + if (dst == ctx->dbounce) + memcpy(p->out, dst, dlen); + + pr_debug("decompress slen %x padding %x dlen %x ignore %x\n", + slen, padding, dlen, ignore); + + return update_param(p, slen + padding, dlen); +} + +static int nx842_crypto_decompress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + struct nx842_crypto_header *hdr; + struct nx842_crypto_param p; + struct nx842_constraints c; + int n, ret, hdr_len; + u16 ignore = 0; + bool usehw = true; + + p.in = (u8 *)src; + p.iremain = slen; + p.out = dst; + p.oremain = *dlen; + p.ototal = 0; + + *dlen = 0; + + if (read_constraints(&c)) + usehw = false; + + hdr = (struct nx842_crypto_header *)src; + + /* If it doesn't start with our header magic number, assume it's a raw + * 842 compressed buffer and pass it directly to the hardware driver + */ + if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) { + struct nx842_crypto_header_group g = { + .padding = 0, + .compressed_length = cpu_to_be32(p.iremain), + .uncompressed_length = cpu_to_be32(p.oremain), + }; + + ret = decompress(ctx, &p, &g, &c, 0, usehw); + if (ret) + return ret; + + *dlen = p.ototal; + + return 0; + } + + if (!hdr->groups) { + pr_err("header has no groups\n"); + return -EINVAL; + } + if (hdr->groups > NX842_CRYPTO_GROUP_MAX) { + pr_err("header has too many groups %x, max %x\n", + hdr->groups, NX842_CRYPTO_GROUP_MAX); + return -EINVAL; + } + + hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups); + if (hdr_len > slen) + return -EOVERFLOW; + + memcpy(&ctx->header, src, hdr_len); + hdr = &ctx->header; + + for (n = 0; n < hdr->groups; n++) { + /* ignore applies to last group */ + if (n + 1 == hdr->groups) + ignore = be16_to_cpu(hdr->ignore); + + ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw); + if (ret) + return ret; + } + + *dlen = p.ototal; + + pr_debug("decompress total slen %x dlen %x\n", slen, *dlen); + + return 0; +} + +static struct crypto_alg alg = { + .cra_name = "842", + .cra_driver_name = "842-nx", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct nx842_crypto_ctx), + .cra_module = THIS_MODULE, + .cra_init = nx842_crypto_init, + .cra_exit = nx842_crypto_exit, + .cra_u = { .compress = { + .coa_compress = nx842_crypto_compress, + .coa_decompress = nx842_crypto_decompress } } +}; + +static int __init nx842_crypto_mod_init(void) +{ + return crypto_register_alg(&alg); +} +module_init(nx842_crypto_mod_init); + +static void __exit nx842_crypto_mod_exit(void) +{ + crypto_unregister_alg(&alg); +} +module_exit(nx842_crypto_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface"); +MODULE_ALIAS_CRYPTO("842"); +MODULE_ALIAS_CRYPTO("842-nx"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/drivers/crypto/nx/nx-842-platform.c b/drivers/crypto/nx/nx-842-platform.c new file mode 100644 index 0000000..664f13d --- /dev/null +++ b/drivers/crypto/nx/nx-842-platform.c @@ -0,0 +1,84 @@ + +#include "nx-842.h" + +/* this is needed, separate from the main nx-842.c driver, because that main + * driver loads the platform drivers during its init(), and it expects one + * (or none) of the platform drivers to set this pointer to its driver. + * That means this pointer can't be in the main nx-842 driver, because it + * wouldn't be accessible until after the main driver loaded, which wouldn't + * be possible as it's waiting for the platform driver to load. So place it + * here. + */ +static struct nx842_driver *driver; +static DEFINE_SPINLOCK(driver_lock); + +struct nx842_driver *nx842_platform_driver(void) +{ + return driver; +} +EXPORT_SYMBOL_GPL(nx842_platform_driver); + +bool nx842_platform_driver_set(struct nx842_driver *_driver) +{ + bool ret = false; + + spin_lock(&driver_lock); + + if (!driver) { + driver = _driver; + ret = true; + } else + WARN(1, "can't set platform driver, already set to %s\n", + driver->name); + + spin_unlock(&driver_lock); + return ret; +} +EXPORT_SYMBOL_GPL(nx842_platform_driver_set); + +/* only call this from the platform driver exit function */ +void nx842_platform_driver_unset(struct nx842_driver *_driver) +{ + spin_lock(&driver_lock); + + if (driver == _driver) + driver = NULL; + else if (driver) + WARN(1, "can't unset platform driver %s, currently set to %s\n", + _driver->name, driver->name); + else + WARN(1, "can't unset platform driver, already unset\n"); + + spin_unlock(&driver_lock); +} +EXPORT_SYMBOL_GPL(nx842_platform_driver_unset); + +bool nx842_platform_driver_get(void) +{ + bool ret = false; + + spin_lock(&driver_lock); + + if (driver) + ret = try_module_get(driver->owner); + + spin_unlock(&driver_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(nx842_platform_driver_get); + +void nx842_platform_driver_put(void) +{ + spin_lock(&driver_lock); + + if (driver) + module_put(driver->owner); + + spin_unlock(&driver_lock); +} +EXPORT_SYMBOL_GPL(nx842_platform_driver_put); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); +MODULE_DESCRIPTION("842 H/W Compression platform driver"); diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c new file mode 100644 index 0000000..33b3b0a --- /dev/null +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -0,0 +1,637 @@ +/* + * Driver for IBM PowerNV 842 compression accelerator + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "nx-842.h" + +#include <linux/timer.h> + +#include <asm/prom.h> +#include <asm/icswx.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors"); + +#define WORKMEM_ALIGN (CRB_ALIGN) +#define CSB_WAIT_MAX (5000) /* ms */ + +struct nx842_workmem { + /* Below fields must be properly aligned */ + struct coprocessor_request_block crb; /* CRB_ALIGN align */ + struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ + struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ + /* Above fields must be properly aligned */ + + ktime_t start; + + char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ +} __packed __aligned(WORKMEM_ALIGN); + +struct nx842_coproc { + unsigned int chip_id; + unsigned int ct; + unsigned int ci; + struct list_head list; +}; + +/* no cpu hotplug on powernv, so this list never changes after init */ +static LIST_HEAD(nx842_coprocs); +static unsigned int nx842_ct; + +/** + * setup_indirect_dde - Setup an indirect DDE + * + * The DDE is setup with the the DDE count, byte count, and address of + * first direct DDE in the list. + */ +static void setup_indirect_dde(struct data_descriptor_entry *dde, + struct data_descriptor_entry *ddl, + unsigned int dde_count, unsigned int byte_count) +{ + dde->flags = 0; + dde->count = dde_count; + dde->index = 0; + dde->length = cpu_to_be32(byte_count); + dde->address = cpu_to_be64(nx842_get_pa(ddl)); +} + +/** + * setup_direct_dde - Setup single DDE from buffer + * + * The DDE is setup with the buffer and length. The buffer must be properly + * aligned. The used length is returned. + * Returns: + * N Successfully set up DDE with N bytes + */ +static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, + unsigned long pa, unsigned int len) +{ + unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); + + dde->flags = 0; + dde->count = 0; + dde->index = 0; + dde->length = cpu_to_be32(l); + dde->address = cpu_to_be64(pa); + + return l; +} + +/** + * setup_ddl - Setup DDL from buffer + * + * Returns: + * 0 Successfully set up DDL + */ +static int setup_ddl(struct data_descriptor_entry *dde, + struct data_descriptor_entry *ddl, + unsigned char *buf, unsigned int len, + bool in) +{ + unsigned long pa = nx842_get_pa(buf); + int i, ret, total_len = len; + + if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { + pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", + in ? "input" : "output", pa, DDE_BUFFER_ALIGN); + return -EINVAL; + } + + /* only need to check last mult; since buffer must be + * DDE_BUFFER_ALIGN aligned, and that is a multiple of + * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers + * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. + */ + if (len % DDE_BUFFER_LAST_MULT) { + pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", + in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); + if (in) + return -EINVAL; + len = round_down(len, DDE_BUFFER_LAST_MULT); + } + + /* use a single direct DDE */ + if (len <= LEN_ON_PAGE(pa)) { + ret = setup_direct_dde(dde, pa, len); + WARN_ON(ret < len); + return 0; + } + + /* use the DDL */ + for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { + ret = setup_direct_dde(&ddl[i], pa, len); + buf += ret; + len -= ret; + pa = nx842_get_pa(buf); + } + + if (len > 0) { + pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", + total_len, in ? "input" : "output", len); + if (in) + return -EMSGSIZE; + total_len -= len; + } + setup_indirect_dde(dde, ddl, i, total_len); + + return 0; +} + +#define CSB_ERR(csb, msg, ...) \ + pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ + ##__VA_ARGS__, (csb)->flags, \ + (csb)->cs, (csb)->cc, (csb)->ce, \ + be32_to_cpu((csb)->count)) + +#define CSB_ERR_ADDR(csb, msg, ...) \ + CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ + (unsigned long)be64_to_cpu((csb)->address)) + +/** + * wait_for_csb + */ +static int wait_for_csb(struct nx842_workmem *wmem, + struct coprocessor_status_block *csb) +{ + ktime_t start = wmem->start, now = ktime_get(); + ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); + + while (!(ACCESS_ONCE(csb->flags) & CSB_V)) { + cpu_relax(); + now = ktime_get(); + if (ktime_after(now, timeout)) + break; + } + + /* hw has updated csb and output buffer */ + barrier(); + + /* check CSB flags */ + if (!(csb->flags & CSB_V)) { + CSB_ERR(csb, "CSB still not valid after %ld us, giving up", + (long)ktime_us_delta(now, start)); + return -ETIMEDOUT; + } + if (csb->flags & CSB_F) { + CSB_ERR(csb, "Invalid CSB format"); + return -EPROTO; + } + if (csb->flags & CSB_CH) { + CSB_ERR(csb, "Invalid CSB chaining state"); + return -EPROTO; + } + + /* verify CSB completion sequence is 0 */ + if (csb->cs) { + CSB_ERR(csb, "Invalid CSB completion sequence"); + return -EPROTO; + } + + /* check CSB Completion Code */ + switch (csb->cc) { + /* no error */ + case CSB_CC_SUCCESS: + break; + case CSB_CC_TPBC_GT_SPBC: + /* not an error, but the compressed data is + * larger than the uncompressed data :( + */ + break; + + /* input data errors */ + case CSB_CC_OPERAND_OVERLAP: + /* input and output buffers overlap */ + CSB_ERR(csb, "Operand Overlap error"); + return -EINVAL; + case CSB_CC_INVALID_OPERAND: + CSB_ERR(csb, "Invalid operand"); + return -EINVAL; + case CSB_CC_NOSPC: + /* output buffer too small */ + return -ENOSPC; + case CSB_CC_ABORT: + CSB_ERR(csb, "Function aborted"); + return -EINTR; + case CSB_CC_CRC_MISMATCH: + CSB_ERR(csb, "CRC mismatch"); + return -EINVAL; + case CSB_CC_TEMPL_INVALID: + CSB_ERR(csb, "Compressed data template invalid"); + return -EINVAL; + case CSB_CC_TEMPL_OVERFLOW: + CSB_ERR(csb, "Compressed data template shows data past end"); + return -EINVAL; + + /* these should not happen */ + case CSB_CC_INVALID_ALIGN: + /* setup_ddl should have detected this */ + CSB_ERR_ADDR(csb, "Invalid alignment"); + return -EINVAL; + case CSB_CC_DATA_LENGTH: + /* setup_ddl should have detected this */ + CSB_ERR(csb, "Invalid data length"); + return -EINVAL; + case CSB_CC_WR_TRANSLATION: + case CSB_CC_TRANSLATION: + case CSB_CC_TRANSLATION_DUP1: + case CSB_CC_TRANSLATION_DUP2: + case CSB_CC_TRANSLATION_DUP3: + case CSB_CC_TRANSLATION_DUP4: + case CSB_CC_TRANSLATION_DUP5: + case CSB_CC_TRANSLATION_DUP6: + /* should not happen, we use physical addrs */ + CSB_ERR_ADDR(csb, "Translation error"); + return -EPROTO; + case CSB_CC_WR_PROTECTION: + case CSB_CC_PROTECTION: + case CSB_CC_PROTECTION_DUP1: + case CSB_CC_PROTECTION_DUP2: + case CSB_CC_PROTECTION_DUP3: + case CSB_CC_PROTECTION_DUP4: + case CSB_CC_PROTECTION_DUP5: + case CSB_CC_PROTECTION_DUP6: + /* should not happen, we use physical addrs */ + CSB_ERR_ADDR(csb, "Protection error"); + return -EPROTO; + case CSB_CC_PRIVILEGE: + /* shouldn't happen, we're in HYP mode */ + CSB_ERR(csb, "Insufficient Privilege error"); + return -EPROTO; + case CSB_CC_EXCESSIVE_DDE: + /* shouldn't happen, setup_ddl doesn't use many dde's */ + CSB_ERR(csb, "Too many DDEs in DDL"); + return -EINVAL; + case CSB_CC_TRANSPORT: + /* shouldn't happen, we setup CRB correctly */ + CSB_ERR(csb, "Invalid CRB"); + return -EINVAL; + case CSB_CC_SEGMENTED_DDL: + /* shouldn't happen, setup_ddl creates DDL right */ + CSB_ERR(csb, "Segmented DDL error"); + return -EINVAL; + case CSB_CC_DDE_OVERFLOW: + /* shouldn't happen, setup_ddl creates DDL right */ + CSB_ERR(csb, "DDE overflow error"); + return -EINVAL; + case CSB_CC_SESSION: + /* should not happen with ICSWX */ + CSB_ERR(csb, "Session violation error"); + return -EPROTO; + case CSB_CC_CHAIN: + /* should not happen, we don't use chained CRBs */ + CSB_ERR(csb, "Chained CRB error"); + return -EPROTO; + case CSB_CC_SEQUENCE: + /* should not happen, we don't use chained CRBs */ + CSB_ERR(csb, "CRB seqeunce number error"); + return -EPROTO; + case CSB_CC_UNKNOWN_CODE: + CSB_ERR(csb, "Unknown subfunction code"); + return -EPROTO; + + /* hardware errors */ + case CSB_CC_RD_EXTERNAL: + case CSB_CC_RD_EXTERNAL_DUP1: + case CSB_CC_RD_EXTERNAL_DUP2: + case CSB_CC_RD_EXTERNAL_DUP3: + CSB_ERR_ADDR(csb, "Read error outside coprocessor"); + return -EPROTO; + case CSB_CC_WR_EXTERNAL: + CSB_ERR_ADDR(csb, "Write error outside coprocessor"); + return -EPROTO; + case CSB_CC_INTERNAL: + CSB_ERR(csb, "Internal error in coprocessor"); + return -EPROTO; + case CSB_CC_PROVISION: + CSB_ERR(csb, "Storage provision error"); + return -EPROTO; + case CSB_CC_HW: + CSB_ERR(csb, "Correctable hardware error"); + return -EPROTO; + + default: + CSB_ERR(csb, "Invalid CC %d", csb->cc); + return -EPROTO; + } + + /* check Completion Extension state */ + if (csb->ce & CSB_CE_TERMINATION) { + CSB_ERR(csb, "CSB request was terminated"); + return -EPROTO; + } + if (csb->ce & CSB_CE_INCOMPLETE) { + CSB_ERR(csb, "CSB request not complete"); + return -EPROTO; + } + if (!(csb->ce & CSB_CE_TPBC)) { + CSB_ERR(csb, "TPBC not provided, unknown target length"); + return -EPROTO; + } + + /* successful completion */ + pr_debug_ratelimited("Processed %u bytes in %lu us\n", csb->count, + (unsigned long)ktime_us_delta(now, start)); + + return 0; +} + +/** + * nx842_powernv_function - compress/decompress data using the 842 algorithm + * + * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. + * This compresses or decompresses the provided input buffer into the provided + * output buffer. + * + * Upon return from this function @outlen contains the length of the + * output data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * The @workmem buffer should only be used by one function call at a time. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, size determined by + * nx842_powernv_driver.workmem_size + * @fc: function code, see CCW Function Codes in nx-842.h + * + * Returns: + * 0 Success, output of length @outlenp stored in the buffer at @out + * -ENODEV Hardware unavailable + * -ENOSPC Output buffer is to small + * -EMSGSIZE Input buffer too large + * -EINVAL buffer constraints do not fix nx842_constraints + * -EPROTO hardware error during operation + * -ETIMEDOUT hardware did not complete operation in reasonable time + * -EINTR operation was aborted + */ +static int nx842_powernv_function(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *workmem, int fc) +{ + struct coprocessor_request_block *crb; + struct coprocessor_status_block *csb; + struct nx842_workmem *wmem; + int ret; + u64 csb_addr; + u32 ccw; + unsigned int outlen = *outlenp; + + wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); + + *outlenp = 0; + + /* shoudn't happen, we don't load without a coproc */ + if (!nx842_ct) { + pr_err_ratelimited("coprocessor CT is 0"); + return -ENODEV; + } + + crb = &wmem->crb; + csb = &crb->csb; + + /* Clear any previous values */ + memset(crb, 0, sizeof(*crb)); + + /* set up DDLs */ + ret = setup_ddl(&crb->source, wmem->ddl_in, + (unsigned char *)in, inlen, true); + if (ret) + return ret; + ret = setup_ddl(&crb->target, wmem->ddl_out, + out, outlen, false); + if (ret) + return ret; + + /* set up CCW */ + ccw = 0; + ccw = SET_FIELD(ccw, CCW_CT, nx842_ct); + ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */ + ccw = SET_FIELD(ccw, CCW_FC_842, fc); + + /* set up CRB's CSB addr */ + csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; + csb_addr |= CRB_CSB_AT; /* Addrs are phys */ + crb->csb_addr = cpu_to_be64(csb_addr); + + wmem->start = ktime_get(); + + /* do ICSWX */ + ret = icswx(cpu_to_be32(ccw), crb); + + pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, + (unsigned int)ccw, + (unsigned int)be32_to_cpu(crb->ccw)); + + switch (ret) { + case ICSWX_INITIATED: + ret = wait_for_csb(wmem, csb); + break; + case ICSWX_BUSY: + pr_debug_ratelimited("842 Coprocessor busy\n"); + ret = -EBUSY; + break; + case ICSWX_REJECTED: + pr_err_ratelimited("ICSWX rejected\n"); + ret = -EPROTO; + break; + default: + pr_err_ratelimited("Invalid ICSWX return code %x\n", ret); + ret = -EPROTO; + break; + } + + if (!ret) + *outlenp = be32_to_cpu(csb->count); + + return ret; +} + +/** + * nx842_powernv_compress - Compress data using the 842 algorithm + * + * Compression provided by the NX842 coprocessor on IBM PowerNV systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, size determined by + * nx842_powernv_driver.workmem_size + * + * Returns: see @nx842_powernv_function() + */ +static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *wmem) +{ + return nx842_powernv_function(in, inlen, out, outlenp, + wmem, CCW_FC_842_COMP_NOCRC); +} + +/** + * nx842_powernv_decompress - Decompress data using the 842 algorithm + * + * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * decompressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, size determined by + * nx842_powernv_driver.workmem_size + * + * Returns: see @nx842_powernv_function() + */ +static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *wmem) +{ + return nx842_powernv_function(in, inlen, out, outlenp, + wmem, CCW_FC_842_DECOMP_NOCRC); +} + +static int __init nx842_powernv_probe(struct device_node *dn) +{ + struct nx842_coproc *coproc; + struct property *ct_prop, *ci_prop; + unsigned int ct, ci; + int chip_id; + + chip_id = of_get_ibm_chip_id(dn); + if (chip_id < 0) { + pr_err("ibm,chip-id missing\n"); + return -EINVAL; + } + ct_prop = of_find_property(dn, "ibm,842-coprocessor-type", NULL); + if (!ct_prop) { + pr_err("ibm,842-coprocessor-type missing\n"); + return -EINVAL; + } + ct = be32_to_cpu(*(unsigned int *)ct_prop->value); + ci_prop = of_find_property(dn, "ibm,842-coprocessor-instance", NULL); + if (!ci_prop) { + pr_err("ibm,842-coprocessor-instance missing\n"); + return -EINVAL; + } + ci = be32_to_cpu(*(unsigned int *)ci_prop->value); + + coproc = kmalloc(sizeof(*coproc), GFP_KERNEL); + if (!coproc) + return -ENOMEM; + + coproc->chip_id = chip_id; + coproc->ct = ct; + coproc->ci = ci; + INIT_LIST_HEAD(&coproc->list); + list_add(&coproc->list, &nx842_coprocs); + + pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); + + if (!nx842_ct) + nx842_ct = ct; + else if (nx842_ct != ct) + pr_err("NX842 chip %d, CT %d != first found CT %d\n", + chip_id, ct, nx842_ct); + + return 0; +} + +static struct nx842_constraints nx842_powernv_constraints = { + .alignment = DDE_BUFFER_ALIGN, + .multiple = DDE_BUFFER_LAST_MULT, + .minimum = DDE_BUFFER_LAST_MULT, + .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, +}; + +static struct nx842_driver nx842_powernv_driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .workmem_size = sizeof(struct nx842_workmem), + .constraints = &nx842_powernv_constraints, + .compress = nx842_powernv_compress, + .decompress = nx842_powernv_decompress, +}; + +static __init int nx842_powernv_init(void) +{ + struct device_node *dn; + + /* verify workmem size/align restrictions */ + BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); + BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); + BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); + /* verify buffer size/align restrictions */ + BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); + BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); + BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); + + pr_info("loading\n"); + + for_each_compatible_node(dn, NULL, "ibm,power-nx") + nx842_powernv_probe(dn); + + if (!nx842_ct) { + pr_err("no coprocessors found\n"); + return -ENODEV; + } + + if (!nx842_platform_driver_set(&nx842_powernv_driver)) { + struct nx842_coproc *coproc, *n; + + list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { + list_del(&coproc->list); + kfree(coproc); + } + + return -EEXIST; + } + + pr_info("loaded\n"); + + return 0; +} +module_init(nx842_powernv_init); + +static void __exit nx842_powernv_exit(void) +{ + struct nx842_coproc *coproc, *n; + + nx842_platform_driver_unset(&nx842_powernv_driver); + + list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { + list_del(&coproc->list); + kfree(coproc); + } + + pr_info("unloaded\n"); +} +module_exit(nx842_powernv_exit); diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c new file mode 100644 index 0000000..3040a60 --- /dev/null +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -0,0 +1,1140 @@ +/* + * Driver for IBM Power 842 compression accelerator + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> + */ + +#include <asm/vio.h> + +#include "nx-842.h" +#include "nx_csbcpb.h" /* struct nx_csbcpb */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); + +static struct nx842_constraints nx842_pseries_constraints = { + .alignment = DDE_BUFFER_ALIGN, + .multiple = DDE_BUFFER_LAST_MULT, + .minimum = DDE_BUFFER_LAST_MULT, + .maximum = PAGE_SIZE, /* dynamic, max_sync_size */ +}; + +static int check_constraints(unsigned long buf, unsigned int *len, bool in) +{ + if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) { + pr_debug("%s buffer 0x%lx not aligned to 0x%x\n", + in ? "input" : "output", buf, + nx842_pseries_constraints.alignment); + return -EINVAL; + } + if (*len % nx842_pseries_constraints.multiple) { + pr_debug("%s buffer len 0x%x not multiple of 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.multiple); + if (in) + return -EINVAL; + *len = round_down(*len, nx842_pseries_constraints.multiple); + } + if (*len < nx842_pseries_constraints.minimum) { + pr_debug("%s buffer len 0x%x under minimum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.minimum); + return -EINVAL; + } + if (*len > nx842_pseries_constraints.maximum) { + pr_debug("%s buffer len 0x%x over maximum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.maximum); + if (in) + return -EINVAL; + *len = nx842_pseries_constraints.maximum; + } + return 0; +} + +/* I assume we need to align the CSB? */ +#define WORKMEM_ALIGN (256) + +struct nx842_workmem { + /* scatterlist */ + char slin[4096]; + char slout[4096]; + /* coprocessor status/parameter block */ + struct nx_csbcpb csbcpb; + + char padding[WORKMEM_ALIGN]; +} __aligned(WORKMEM_ALIGN); + +/* Macros for fields within nx_csbcpb */ +/* Check the valid bit within the csbcpb valid field */ +#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) + +/* CE macros operate on the completion_extension field bits in the csbcpb. + * CE0 0=full completion, 1=partial completion + * CE1 0=CE0 indicates completion, 1=termination (output may be modified) + * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ +#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) +#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) +#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) + +/* The NX unit accepts data only on 4K page boundaries */ +#define NX842_HW_PAGE_SIZE (4096) +#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) + +enum nx842_status { + UNAVAILABLE, + AVAILABLE +}; + +struct ibm_nx842_counters { + atomic64_t comp_complete; + atomic64_t comp_failed; + atomic64_t decomp_complete; + atomic64_t decomp_failed; + atomic64_t swdecomp; + atomic64_t comp_times[32]; + atomic64_t decomp_times[32]; +}; + +static struct nx842_devdata { + struct vio_dev *vdev; + struct device *dev; + struct ibm_nx842_counters *counters; + unsigned int max_sg_len; + unsigned int max_sync_size; + unsigned int max_sync_sg; + enum nx842_status status; +} __rcu *devdata; +static DEFINE_SPINLOCK(devdata_mutex); + +#define NX842_COUNTER_INC(_x) \ +static inline void nx842_inc_##_x( \ + const struct nx842_devdata *dev) { \ + if (dev) \ + atomic64_inc(&dev->counters->_x); \ +} +NX842_COUNTER_INC(comp_complete); +NX842_COUNTER_INC(comp_failed); +NX842_COUNTER_INC(decomp_complete); +NX842_COUNTER_INC(decomp_failed); +NX842_COUNTER_INC(swdecomp); + +#define NX842_HIST_SLOTS 16 + +static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) +{ + int bucket = fls(time); + + if (bucket) + bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); + + atomic64_inc(×[bucket]); +} + +/* NX unit operation flags */ +#define NX842_OP_COMPRESS 0x0 +#define NX842_OP_CRC 0x1 +#define NX842_OP_DECOMPRESS 0x2 +#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) +#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) +#define NX842_OP_ASYNC (1<<23) +#define NX842_OP_NOTIFY (1<<22) +#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) + +static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) +{ + /* No use of DMA mappings within the driver. */ + return 0; +} + +struct nx842_slentry { + __be64 ptr; /* Real address (use __pa()) */ + __be64 len; +}; + +/* pHyp scatterlist entry */ +struct nx842_scatterlist { + int entry_nr; /* number of slentries */ + struct nx842_slentry *entries; /* ptr to array of slentries */ +}; + +/* Does not include sizeof(entry_nr) in the size */ +static inline unsigned long nx842_get_scatterlist_size( + struct nx842_scatterlist *sl) +{ + return sl->entry_nr * sizeof(struct nx842_slentry); +} + +static int nx842_build_scatterlist(unsigned long buf, int len, + struct nx842_scatterlist *sl) +{ + unsigned long entrylen; + struct nx842_slentry *entry; + + sl->entry_nr = 0; + + entry = sl->entries; + while (len) { + entry->ptr = cpu_to_be64(nx842_get_pa((void *)buf)); + entrylen = min_t(int, len, + LEN_ON_SIZE(buf, NX842_HW_PAGE_SIZE)); + entry->len = cpu_to_be64(entrylen); + + len -= entrylen; + buf += entrylen; + + sl->entry_nr++; + entry++; + } + + return 0; +} + +static int nx842_validate_result(struct device *dev, + struct cop_status_block *csb) +{ + /* The csb must be valid after returning from vio_h_cop_sync */ + if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { + dev_err(dev, "%s: cspcbp not valid upon completion.\n", + __func__); + dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", + csb->valid, + csb->crb_seq_number, + csb->completion_code, + csb->completion_extension); + dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", + be32_to_cpu(csb->processed_byte_count), + (unsigned long)be64_to_cpu(csb->address)); + return -EIO; + } + + /* Check return values from the hardware in the CSB */ + switch (csb->completion_code) { + case 0: /* Completed without error */ + break; + case 64: /* Target bytes > Source bytes during compression */ + case 13: /* Output buffer too small */ + dev_dbg(dev, "%s: Compression output larger than input\n", + __func__); + return -ENOSPC; + case 66: /* Input data contains an illegal template field */ + case 67: /* Template indicates data past the end of the input stream */ + dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", + __func__, csb->completion_code); + return -EINVAL; + default: + dev_dbg(dev, "%s: Unspecified error (code:%d)\n", + __func__, csb->completion_code); + return -EIO; + } + + /* Hardware sanity check */ + if (!NX842_CSBCPB_CE2(csb->completion_extension)) { + dev_err(dev, "%s: No error returned by hardware, but " + "data returned is unusable, contact support.\n" + "(Additional info: csbcbp->processed bytes " + "does not specify processed bytes for the " + "target buffer.)\n", __func__); + return -EIO; + } + + return 0; +} + +/** + * nx842_pseries_compress - Compress data using the 842 algorithm + * + * Compression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EIO Internal error + * -ENODEV Hardware unavailable + */ +static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, max_sync_size; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + max_sync_size = local_devdata->max_sync_size; + dev = local_devdata->dev; + + /* Init scatterlist */ + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_COMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", + __func__, (unsigned long)op.in, (long)op.inlen, + (unsigned long)op.out, (long)op.outlen); + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + ret = -EIO; + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); + dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen); + +unlock: + if (ret) + nx842_inc_comp_failed(local_devdata); + else { + nx842_inc_comp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->comp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + rcu_read_unlock(); + return ret; +} + +/** + * nx842_pseries_decompress - Decompress data using the 842 algorithm + * + * Decompression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. The size allocated to the output buffer is + * provided by the caller of this function in @outlen. Upon return from + * this function @outlen contains the length of the decompressed data. + * If there is an error then @outlen will be 0 and an error will be + * specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * nx842_pseries_driver.workmem_size + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENODEV Hardware decompression device is unavailable + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EINVAL Bad input data encountered when attempting decompress + * -EIO Internal error + */ +static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, max_sync_size; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + /* Ensure page alignment and size */ + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + max_sync_size = local_devdata->max_sync_size; + dev = local_devdata->dev; + + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + + /* Init scatterlist */ + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_DECOMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + dev_dbg(dev, "%s: op.in %lx op.inlen %ld op.out %lx op.outlen %ld\n", + __func__, (unsigned long)op.in, (long)op.inlen, + (unsigned long)op.out, (long)op.outlen); + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = be32_to_cpu(csbcpb->csb.processed_byte_count); + +unlock: + if (ret) + /* decompress fail */ + nx842_inc_decomp_failed(local_devdata); + else { + nx842_inc_decomp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->decomp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + + rcu_read_unlock(); + return ret; +} + +/** + * nx842_OF_set_defaults -- Set default (disabled) values for devdata + * + * @devdata - struct nx842_devdata to update + * + * Returns: + * 0 on success + * -ENOENT if @devdata ptr is NULL + */ +static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +{ + if (devdata) { + devdata->max_sync_size = 0; + devdata->max_sync_sg = 0; + devdata->max_sg_len = 0; + devdata->status = UNAVAILABLE; + return 0; + } else + return -ENOENT; +} + +/** + * nx842_OF_upd_status -- Update the device info from OF status prop + * + * The status property indicates if the accelerator is enabled. If the + * device is in the OF tree it indicates that the hardware is present. + * The status field indicates if the device is enabled when the status + * is 'okay'. Otherwise the device driver will be disabled. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 - Device is available + * -EINVAL - Device is not available + */ +static int nx842_OF_upd_status(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const char *status = (const char *)prop->value; + + if (!strncmp(status, "okay", (size_t)prop->length)) { + devdata->status = AVAILABLE; + } else { + dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", + __func__, status); + devdata->status = UNAVAILABLE; + } + + return ret; +} + +/** + * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop + * + * Definition of the 'ibm,max-sg-len' OF property: + * This field indicates the maximum byte length of a scatter list + * for the platform facility. It is a single cell encoded as with encode-int. + * + * Example: + * # od -x ibm,max-sg-len + * 0000000 0000 0ff0 + * + * In this example, the maximum byte length of a scatter list is + * 0x0ff0 (4,080). + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const unsigned int maxsglen = of_read_number(prop->value, 1); + + if (prop->length != sizeof(maxsglen)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, + prop->length, sizeof(maxsglen)); + ret = -EINVAL; + } else { + devdata->max_sg_len = min_t(unsigned int, + maxsglen, NX842_HW_PAGE_SIZE); + } + + return ret; +} + +/** + * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop + * + * Definition of the 'ibm,max-sync-cop' OF property: + * Two series of cells. The first series of cells represents the maximums + * that can be synchronously compressed. The second series of cells + * represents the maximums that can be synchronously decompressed. + * 1. The first cell in each series contains the count of the number of + * data length, scatter list elements pairs that follow – each being + * of the form + * a. One cell data byte length + * b. One cell total number of scatter list elements + * + * Example: + * # od -x ibm,max-sync-cop + * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 + * 0000020 0000 1000 0000 01fe + * + * In this example, compression supports 0x1000 (4,096) data byte length + * and 0x1fe (510) total scatter list elements. Decompression supports + * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list + * elements. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + unsigned int comp_data_limit, decomp_data_limit; + unsigned int comp_sg_limit, decomp_sg_limit; + const struct maxsynccop_t { + __be32 comp_elements; + __be32 comp_data_limit; + __be32 comp_sg_limit; + __be32 decomp_elements; + __be32 decomp_data_limit; + __be32 decomp_sg_limit; + } *maxsynccop; + + if (prop->length != sizeof(*maxsynccop)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, + sizeof(*maxsynccop)); + ret = -EINVAL; + goto out; + } + + maxsynccop = (const struct maxsynccop_t *)prop->value; + comp_data_limit = be32_to_cpu(maxsynccop->comp_data_limit); + comp_sg_limit = be32_to_cpu(maxsynccop->comp_sg_limit); + decomp_data_limit = be32_to_cpu(maxsynccop->decomp_data_limit); + decomp_sg_limit = be32_to_cpu(maxsynccop->decomp_sg_limit); + + /* Use one limit rather than separate limits for compression and + * decompression. Set a maximum for this so as not to exceed the + * size that the header can support and round the value down to + * the hardware page size (4K) */ + devdata->max_sync_size = min(comp_data_limit, decomp_data_limit); + + devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, + 65536); + + if (devdata->max_sync_size < 4096) { + dev_err(devdata->dev, "%s: hardware max data size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_size); + ret = -EINVAL; + goto out; + } + + nx842_pseries_constraints.maximum = devdata->max_sync_size; + + devdata->max_sync_sg = min(comp_sg_limit, decomp_sg_limit); + if (devdata->max_sync_sg < 1) { + dev_err(devdata->dev, "%s: hardware max sg size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_sg); + ret = -EINVAL; + goto out; + } + +out: + return ret; +} + +/** + * + * nx842_OF_upd -- Handle OF properties updates for the device. + * + * Set all properties from the OF tree. Optionally, a new property + * can be provided by the @new_prop pointer to overwrite an existing value. + * The device will remain disabled until all values are valid, this function + * will return an error for updates unless all values are valid. + * + * @new_prop: If not NULL, this property is being updated. If NULL, update + * all properties from the current values in the OF tree. + * + * Returns: + * 0 - Success + * -ENOMEM - Could not allocate memory for new devdata structure + * -EINVAL - property value not found, new_prop is not a recognized + * property for the device or property value is not valid. + * -ENODEV - Device is not available + */ +static int nx842_OF_upd(struct property *new_prop) +{ + struct nx842_devdata *old_devdata = NULL; + struct nx842_devdata *new_devdata = NULL; + struct device_node *of_node = NULL; + struct property *status = NULL; + struct property *maxsglen = NULL; + struct property *maxsyncop = NULL; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + if (old_devdata) + of_node = old_devdata->dev->of_node; + + if (!old_devdata || !of_node) { + pr_err("%s: device is not available\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + return -ENODEV; + } + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_out; + } + + memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); + new_devdata->counters = old_devdata->counters; + + /* Set ptrs for existing properties */ + status = of_find_property(of_node, "status", NULL); + maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); + maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); + if (!status || !maxsglen || !maxsyncop) { + dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); + ret = -EINVAL; + goto error_out; + } + + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; + + /* Perform property updates */ + ret = nx842_OF_upd_status(new_devdata, status); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); + if (ret) + goto error_out; + +out: + dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", + __func__, new_devdata->max_sync_size, + old_devdata->max_sync_size); + dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", + __func__, new_devdata->max_sync_sg, + old_devdata->max_sync_sg); + dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", + __func__, new_devdata->max_sg_len, + old_devdata->max_sg_len); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + return 0; + +error_out: + if (new_devdata) { + dev_info(old_devdata->dev, "%s: device disabled\n", __func__); + nx842_OF_set_defaults(new_devdata); + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + } else { + dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + } + + if (!ret) + ret = -EINVAL; + return ret; +} + +/** + * nx842_OF_notifier - Process updates to OF properties for the device + * + * @np: notifier block + * @action: notifier action + * @update: struct pSeries_reconfig_prop_update pointer if action is + * PSERIES_UPDATE_PROPERTY + * + * Returns: + * NOTIFY_OK on success + * NOTIFY_BAD encoded with error number on failure, use + * notifier_to_errno() to decode this value + */ +static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, + void *data) +{ + struct of_reconfig_data *upd = data; + struct nx842_devdata *local_devdata; + struct device_node *node = NULL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (local_devdata) + node = local_devdata->dev->of_node; + + if (local_devdata && + action == OF_RECONFIG_UPDATE_PROPERTY && + !strcmp(upd->dn->name, node->name)) { + rcu_read_unlock(); + nx842_OF_upd(upd->prop); + } else + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct notifier_block nx842_of_nb = { + .notifier_call = nx842_OF_notifier, +}; + +#define nx842_counter_read(_name) \ +static ssize_t nx842_##_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) { \ + struct nx842_devdata *local_devdata; \ + int p = 0; \ + rcu_read_lock(); \ + local_devdata = rcu_dereference(devdata); \ + if (local_devdata) \ + p = snprintf(buf, PAGE_SIZE, "%ld\n", \ + atomic64_read(&local_devdata->counters->_name)); \ + rcu_read_unlock(); \ + return p; \ +} + +#define NX842DEV_COUNTER_ATTR_RO(_name) \ + nx842_counter_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nx842_##_name##_show,\ + NULL); + +NX842DEV_COUNTER_ATTR_RO(comp_complete); +NX842DEV_COUNTER_ATTR_RO(comp_failed); +NX842DEV_COUNTER_ATTR_RO(decomp_complete); +NX842DEV_COUNTER_ATTR_RO(decomp_failed); +NX842DEV_COUNTER_ATTR_RO(swdecomp); + +static ssize_t nx842_timehist_show(struct device *, + struct device_attribute *, char *); + +static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, + nx842_timehist_show, NULL); +static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, + 0444, nx842_timehist_show, NULL); + +static ssize_t nx842_timehist_show(struct device *dev, + struct device_attribute *attr, char *buf) { + char *p = buf; + struct nx842_devdata *local_devdata; + atomic64_t *times; + int bytes_remain = PAGE_SIZE; + int bytes; + int i; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata) { + rcu_read_unlock(); + return 0; + } + + if (attr == &dev_attr_comp_times) + times = local_devdata->counters->comp_times; + else if (attr == &dev_attr_decomp_times) + times = local_devdata->counters->decomp_times; + else { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { + bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", + i ? (2<<(i-1)) : 0, (2<<i)-1, + atomic64_read(×[i])); + bytes_remain -= bytes; + p += bytes; + } + /* The last bucket holds everything over + * 2<<(NX842_HIST_SLOTS - 2) us */ + bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", + 2<<(NX842_HIST_SLOTS - 2), + atomic64_read(×[(NX842_HIST_SLOTS - 1)])); + p += bytes; + + rcu_read_unlock(); + return p - buf; +} + +static struct attribute *nx842_sysfs_entries[] = { + &dev_attr_comp_complete.attr, + &dev_attr_comp_failed.attr, + &dev_attr_decomp_complete.attr, + &dev_attr_decomp_failed.attr, + &dev_attr_swdecomp.attr, + &dev_attr_comp_times.attr, + &dev_attr_decomp_times.attr, + NULL, +}; + +static struct attribute_group nx842_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = nx842_sysfs_entries, +}; + +static struct nx842_driver nx842_pseries_driver = { + .name = KBUILD_MODNAME, + .owner = THIS_MODULE, + .workmem_size = sizeof(struct nx842_workmem), + .constraints = &nx842_pseries_constraints, + .compress = nx842_pseries_compress, + .decompress = nx842_pseries_decompress, +}; + +static int __init nx842_probe(struct vio_dev *viodev, + const struct vio_device_id *id) +{ + struct nx842_devdata *old_devdata, *new_devdata = NULL; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + + if (old_devdata && old_devdata->vdev != NULL) { + dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); + ret = -1; + goto error_unlock; + } + + dev_set_drvdata(&viodev->dev, NULL); + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), + GFP_NOFS); + if (!new_devdata->counters) { + dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->vdev = viodev; + new_devdata->dev = &viodev->dev; + nx842_OF_set_defaults(new_devdata); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + kfree(old_devdata); + + of_reconfig_notifier_register(&nx842_of_nb); + + ret = nx842_OF_upd(NULL); + if (ret && ret != -ENODEV) { + dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); + ret = -1; + goto error; + } + + rcu_read_lock(); + dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); + rcu_read_unlock(); + + if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { + dev_err(&viodev->dev, "could not create sysfs device attributes\n"); + ret = -1; + goto error; + } + + return 0; + +error_unlock: + spin_unlock_irqrestore(&devdata_mutex, flags); + if (new_devdata) + kfree(new_devdata->counters); + kfree(new_devdata); +error: + return ret; +} + +static int __exit nx842_remove(struct vio_dev *viodev) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Removing IBM Power 842 compression device\n"); + sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + of_reconfig_notifier_unregister(&nx842_of_nb); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(&viodev->dev, NULL); + if (old_devdata) + kfree(old_devdata->counters); + kfree(old_devdata); + + return 0; +} + +static struct vio_device_id nx842_vio_driver_ids[] = { + {"ibm,compression-v1", "ibm,compression"}, + {"", ""}, +}; + +static struct vio_driver nx842_vio_driver = { + .name = KBUILD_MODNAME, + .probe = nx842_probe, + .remove = __exit_p(nx842_remove), + .get_desired_dma = nx842_get_desired_dma, + .id_table = nx842_vio_driver_ids, +}; + +static int __init nx842_init(void) +{ + struct nx842_devdata *new_devdata; + int ret; + + pr_info("Registering IBM Power 842 compression driver\n"); + + if (!of_find_compatible_node(NULL, NULL, "ibm,compression")) + return -ENODEV; + + RCU_INIT_POINTER(devdata, NULL); + new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); + if (!new_devdata) { + pr_err("Could not allocate memory for device data\n"); + return -ENOMEM; + } + new_devdata->status = UNAVAILABLE; + RCU_INIT_POINTER(devdata, new_devdata); + + ret = vio_register_driver(&nx842_vio_driver); + if (ret) { + pr_err("Could not register VIO driver %d\n", ret); + + kfree(new_devdata); + return ret; + } + + if (!nx842_platform_driver_set(&nx842_pseries_driver)) { + vio_unregister_driver(&nx842_vio_driver); + kfree(new_devdata); + return -EEXIST; + } + + return 0; +} + +module_init(nx842_init); + +static void __exit nx842_exit(void) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Exiting IBM Power 842 compression driver\n"); + nx842_platform_driver_unset(&nx842_pseries_driver); + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + if (old_devdata && old_devdata->dev) + dev_set_drvdata(old_devdata->dev, NULL); + kfree(old_devdata); + vio_unregister_driver(&nx842_vio_driver); +} + +module_exit(nx842_exit); + diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 887196e..6e5e0d6 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -1,5 +1,10 @@ /* - * Driver for IBM Power 842 compression accelerator + * Driver frontend for IBM Power 842 compression accelerator + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * Designer of the Power data compression engine: + * Bulent Abali <abali@us.ibm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -10,1594 +15,89 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) IBM Corporation, 2012 - * - * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> - * Seth Jennings <sjenning@linux.vnet.ibm.com> */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/nx842.h> -#include <linux/of.h> -#include <linux/slab.h> - -#include <asm/page.h> -#include <asm/vio.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include "nx_csbcpb.h" /* struct nx_csbcpb */ +#include "nx-842.h" -#define MODULE_NAME "nx-compress" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); -#define SHIFT_4K 12 -#define SHIFT_64K 16 -#define SIZE_4K (1UL << SHIFT_4K) -#define SIZE_64K (1UL << SHIFT_64K) - -/* IO buffer must be 128 byte aligned */ -#define IO_BUFFER_ALIGN 128 - -struct nx842_header { - int blocks_nr; /* number of compressed blocks */ - int offset; /* offset of the first block (from beginning of header) */ - int sizes[0]; /* size of compressed blocks */ -}; - -static inline int nx842_header_size(const struct nx842_header *hdr) -{ - return sizeof(struct nx842_header) + - hdr->blocks_nr * sizeof(hdr->sizes[0]); -} - -/* Macros for fields within nx_csbcpb */ -/* Check the valid bit within the csbcpb valid field */ -#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) - -/* CE macros operate on the completion_extension field bits in the csbcpb. - * CE0 0=full completion, 1=partial completion - * CE1 0=CE0 indicates completion, 1=termination (output may be modified) - * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ -#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) -#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) -#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) - -/* The NX unit accepts data only on 4K page boundaries */ -#define NX842_HW_PAGE_SHIFT SHIFT_4K -#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) -#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) - -enum nx842_status { - UNAVAILABLE, - AVAILABLE -}; - -struct ibm_nx842_counters { - atomic64_t comp_complete; - atomic64_t comp_failed; - atomic64_t decomp_complete; - atomic64_t decomp_failed; - atomic64_t swdecomp; - atomic64_t comp_times[32]; - atomic64_t decomp_times[32]; -}; - -static struct nx842_devdata { - struct vio_dev *vdev; - struct device *dev; - struct ibm_nx842_counters *counters; - unsigned int max_sg_len; - unsigned int max_sync_size; - unsigned int max_sync_sg; - enum nx842_status status; -} __rcu *devdata; -static DEFINE_SPINLOCK(devdata_mutex); - -#define NX842_COUNTER_INC(_x) \ -static inline void nx842_inc_##_x( \ - const struct nx842_devdata *dev) { \ - if (dev) \ - atomic64_inc(&dev->counters->_x); \ -} -NX842_COUNTER_INC(comp_complete); -NX842_COUNTER_INC(comp_failed); -NX842_COUNTER_INC(decomp_complete); -NX842_COUNTER_INC(decomp_failed); -NX842_COUNTER_INC(swdecomp); - -#define NX842_HIST_SLOTS 16 - -static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) -{ - int bucket = fls(time); - - if (bucket) - bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); - - atomic64_inc(×[bucket]); -} - -/* NX unit operation flags */ -#define NX842_OP_COMPRESS 0x0 -#define NX842_OP_CRC 0x1 -#define NX842_OP_DECOMPRESS 0x2 -#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) -#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) -#define NX842_OP_ASYNC (1<<23) -#define NX842_OP_NOTIFY (1<<22) -#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) - -static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) -{ - /* No use of DMA mappings within the driver. */ - return 0; -} - -struct nx842_slentry { - unsigned long ptr; /* Real address (use __pa()) */ - unsigned long len; -}; - -/* pHyp scatterlist entry */ -struct nx842_scatterlist { - int entry_nr; /* number of slentries */ - struct nx842_slentry *entries; /* ptr to array of slentries */ -}; - -/* Does not include sizeof(entry_nr) in the size */ -static inline unsigned long nx842_get_scatterlist_size( - struct nx842_scatterlist *sl) -{ - return sl->entry_nr * sizeof(struct nx842_slentry); -} - -static inline unsigned long nx842_get_pa(void *addr) -{ - if (is_vmalloc_addr(addr)) - return page_to_phys(vmalloc_to_page(addr)) - + offset_in_page(addr); - else - return __pa(addr); -} - -static int nx842_build_scatterlist(unsigned long buf, int len, - struct nx842_scatterlist *sl) -{ - unsigned long nextpage; - struct nx842_slentry *entry; - - sl->entry_nr = 0; - - entry = sl->entries; - while (len) { - entry->ptr = nx842_get_pa((void *)buf); - nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); - if (nextpage < buf + len) { - /* we aren't at the end yet */ - if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) - /* we are in the middle (or beginning) */ - entry->len = NX842_HW_PAGE_SIZE; - else - /* we are at the beginning */ - entry->len = nextpage - buf; - } else { - /* at the end */ - entry->len = len; - } - - len -= entry->len; - buf += entry->len; - sl->entry_nr++; - entry++; - } - - return 0; -} - -/* - * Working memory for software decompression - */ -struct sw842_fifo { - union { - char f8[256][8]; - char f4[512][4]; - }; - char f2[256][2]; - unsigned char f84_full; - unsigned char f2_full; - unsigned char f8_count; - unsigned char f2_count; - unsigned int f4_count; -}; - -/* - * Working memory for crypto API +/** + * nx842_constraints + * + * This provides the driver's constraints. Different nx842 implementations + * may have varying requirements. The constraints are: + * @alignment: All buffers should be aligned to this + * @multiple: All buffer lengths should be a multiple of this + * @minimum: Buffer lengths must not be less than this amount + * @maximum: Buffer lengths must not be more than this amount + * + * The constraints apply to all buffers and lengths, both input and output, + * for both compression and decompression, except for the minimum which + * only applies to compression input and decompression output; the + * compressed data can be less than the minimum constraint. It can be + * assumed that compressed data will always adhere to the multiple + * constraint. + * + * The driver may succeed even if these constraints are violated; + * however the driver can return failure or suffer reduced performance + * if any constraint is not met. */ -struct nx842_workmem { - char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ - union { - /* hardware working memory */ - struct { - /* scatterlist */ - char slin[SIZE_4K]; - char slout[SIZE_4K]; - /* coprocessor status/parameter block */ - struct nx_csbcpb csbcpb; - }; - /* software working memory */ - struct sw842_fifo swfifo; /* software decompression fifo */ - }; -}; - -int nx842_get_workmem_size(void) -{ - return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; -} -EXPORT_SYMBOL_GPL(nx842_get_workmem_size); - -int nx842_get_workmem_size_aligned(void) -{ - return sizeof(struct nx842_workmem); -} -EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); - -static int nx842_validate_result(struct device *dev, - struct cop_status_block *csb) +int nx842_constraints(struct nx842_constraints *c) { - /* The csb must be valid after returning from vio_h_cop_sync */ - if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { - dev_err(dev, "%s: cspcbp not valid upon completion.\n", - __func__); - dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", - csb->valid, - csb->crb_seq_number, - csb->completion_code, - csb->completion_extension); - dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", - csb->processed_byte_count, - (unsigned long)csb->address); - return -EIO; - } - - /* Check return values from the hardware in the CSB */ - switch (csb->completion_code) { - case 0: /* Completed without error */ - break; - case 64: /* Target bytes > Source bytes during compression */ - case 13: /* Output buffer too small */ - dev_dbg(dev, "%s: Compression output larger than input\n", - __func__); - return -ENOSPC; - case 66: /* Input data contains an illegal template field */ - case 67: /* Template indicates data past the end of the input stream */ - dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", - __func__, csb->completion_code); - return -EINVAL; - default: - dev_dbg(dev, "%s: Unspecified error (code:%d)\n", - __func__, csb->completion_code); - return -EIO; - } - - /* Hardware sanity check */ - if (!NX842_CSBCPB_CE2(csb->completion_extension)) { - dev_err(dev, "%s: No error returned by hardware, but " - "data returned is unusable, contact support.\n" - "(Additional info: csbcbp->processed bytes " - "does not specify processed bytes for the " - "target buffer.)\n", __func__); - return -EIO; - } - + memcpy(c, nx842_platform_driver()->constraints, sizeof(*c)); return 0; } +EXPORT_SYMBOL_GPL(nx842_constraints); /** - * nx842_compress - Compress data using the 842 algorithm - * - * Compression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is compressed and the result is stored in the - * provided output buffer. - * - * Upon return from this function @outlen contains the length of the - * compressed data. If there is an error then @outlen will be 0 and an - * error will be specified by the return code from this function. - * - * @in: Pointer to input buffer, must be page aligned - * @inlen: Length of input buffer, must be PAGE_SIZE - * @out: Pointer to output buffer - * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_get_workmem_size() + * nx842_workmem_size * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EMSGSIZE XXX Difficult to describe this limitation - * -EIO Internal error - * -ENODEV Hardware unavailable + * Get the amount of working memory the driver requires. */ -int nx842_compress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, void *wmem) +size_t nx842_workmem_size(void) { - struct nx842_header *hdr; - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; - unsigned long inbuf, outbuf, padding; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start_time = get_tb(); - - /* - * Make sure input buffer is 64k page aligned. This is assumed since - * this driver is designed for page compression only (for now). This - * is very nice since we can now use direct DDE(s) for the input and - * the alignment is guaranteed. - */ - inbuf = (unsigned long)in; - if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata || !local_devdata->dev) { - rcu_read_unlock(); - return -ENODEV; - } - max_sync_size = local_devdata->max_sync_size; - dev = local_devdata->dev; - - /* Create the header */ - hdr = (struct nx842_header *)out; - hdr->blocks_nr = PAGE_SIZE / max_sync_size; - hdrsize = nx842_header_size(hdr); - outbuf = (unsigned long)out + hdrsize; - bytesleft = *outlen - hdrsize; - - /* Init scatterlist */ - workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, - NX842_HW_PAGE_SIZE); - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_COMPRESS; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - op.out = nx842_get_pa(slout.entries); - - for (i = 0; i < hdr->blocks_nr; i++) { - /* - * Aligning the output blocks to 128 bytes does waste space, - * but it prevents the need for bounce buffers and memory - * copies. It also simplifies the code a lot. In the worst - * case (64k page, 4k max_sync_size), you lose up to - * (128*16)/64k = ~3% the compression factor. For 64k - * max_sync_size, the loss would be at most 128/64k = ~0.2%. - */ - padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; - outbuf += padding; - bytesleft -= padding; - if (i == 0) - /* save offset into first block in header */ - hdr->offset = padding + hdrsize; - - if (bytesleft <= 0) { - ret = -ENOSPC; - goto unlock; - } - - /* - * NOTE: If the default max_sync_size is changed from 4k - * to 64k, remove the "likely" case below, since a - * scatterlist will always be needed. - */ - if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = max_sync_size; - - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, max_sync_size, &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } - - /* - * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect - * DDE is required for the outbuf. - * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must - * also be page aligned (1 in 128/4k=32 chance) in order - * to use a direct DDE. - * This is unlikely, just use an indirect DDE always. - */ - nx842_build_scatterlist(outbuf, - min(bytesleft, max_sync_size), &slout); - /* op.out set before loop */ - op.outlen = -nx842_get_scatterlist_size(&slout); - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - ret = -EIO; - goto unlock; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret && ret != -ENOSPC) - goto unlock; - - /* Handle incompressible data */ - if (unlikely(ret == -ENOSPC)) { - if (bytesleft < max_sync_size) { - /* - * Not enough space left in the output buffer - * to store uncompressed block - */ - goto unlock; - } else { - /* Store incompressible block */ - memcpy((void *)outbuf, (void *)inbuf, - max_sync_size); - hdr->sizes[i] = -max_sync_size; - outbuf += max_sync_size; - bytesleft -= max_sync_size; - /* Reset ret, incompressible data handled */ - ret = 0; - } - } else { - /* Normal case, compression was successful */ - size = csbcpb->csb.processed_byte_count; - dev_dbg(dev, "%s: processed_bytes=%d\n", - __func__, size); - hdr->sizes[i] = size; - outbuf += size; - bytesleft -= size; - } - - inbuf += max_sync_size; - } - - *outlen = (unsigned int)(outbuf - (unsigned long)out); - -unlock: - if (ret) - nx842_inc_comp_failed(local_devdata); - else { - nx842_inc_comp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->comp_times, - (get_tb() - start_time) / tb_ticks_per_usec); - } - rcu_read_unlock(); - return ret; + return nx842_platform_driver()->workmem_size; } -EXPORT_SYMBOL_GPL(nx842_compress); - -static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, - const void *); +EXPORT_SYMBOL_GPL(nx842_workmem_size); -/** - * nx842_decompress - Decompress data using the 842 algorithm - * - * Decompression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is decompressed and the result is stored in the - * provided output buffer. The size allocated to the output buffer is - * provided by the caller of this function in @outlen. Upon return from - * this function @outlen contains the length of the decompressed data. - * If there is an error then @outlen will be 0 and an error will be - * specified by the return code from this function. - * - * @in: Pointer to input buffer, will use bounce buffer if not 128 byte - * aligned - * @inlen: Length of input buffer - * @out: Pointer to output buffer, must be page aligned - * @outlen: Length of output buffer, must be PAGE_SIZE - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_get_workmem_size() - * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENODEV Hardware decompression device is unavailable - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EINVAL Bad input data encountered when attempting decompress - * -EIO Internal error - */ -int nx842_decompress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, void *wmem) +int nx842_compress(const unsigned char *in, unsigned int ilen, + unsigned char *out, unsigned int *olen, void *wmem) { - struct nx842_header *hdr; - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0, i, size, max_sync_size; - unsigned long inbuf, outbuf; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start_time = get_tb(); - - /* Ensure page alignment and size */ - outbuf = (unsigned long)out; - if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (local_devdata) - dev = local_devdata->dev; - - /* Get header */ - hdr = (struct nx842_header *)in; - - workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, - NX842_HW_PAGE_SIZE); - - inbuf = (unsigned long)in + hdr->offset; - if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { - /* Copy block(s) into bounce buffer for alignment */ - memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); - inbuf = (unsigned long)workmem->bounce; - } - - /* Init scatterlist */ - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_DECOMPRESS; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - - /* - * max_sync_size may have changed since compression, - * so we can't read it from the device info. We need - * to derive it from hdr->blocks_nr. - */ - max_sync_size = PAGE_SIZE / hdr->blocks_nr; - - for (i = 0; i < hdr->blocks_nr; i++) { - /* Skip padding */ - inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); - - if (hdr->sizes[i] < 0) { - /* Negative sizes indicate uncompressed data blocks */ - size = abs(hdr->sizes[i]); - memcpy((void *)outbuf, (void *)inbuf, size); - outbuf += size; - inbuf += size; - continue; - } - - if (!dev) - goto sw; - - /* - * The better the compression, the more likely the "likely" - * case becomes. - */ - if (likely((inbuf & NX842_HW_PAGE_MASK) == - ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = hdr->sizes[i]; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } - - /* - * NOTE: If the default max_sync_size is changed from 4k - * to 64k, remove the "likely" case below, since a - * scatterlist will always be needed. - */ - if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { - /* Create direct DDE */ - op.out = nx842_get_pa((void *)outbuf); - op.outlen = max_sync_size; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(outbuf, max_sync_size, &slout); - op.out = nx842_get_pa(slout.entries); - op.outlen = -nx842_get_scatterlist_size(&slout); - } - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - dev = NULL; - goto sw; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret) { - dev = NULL; - goto sw; - } - - /* HW decompression success */ - inbuf += hdr->sizes[i]; - outbuf += csbcpb->csb.processed_byte_count; - continue; - -sw: - /* software decompression */ - size = max_sync_size; - ret = sw842_decompress( - (unsigned char *)inbuf, hdr->sizes[i], - (unsigned char *)outbuf, &size, wmem); - if (ret) - pr_debug("%s: sw842_decompress failed with %d\n", - __func__, ret); - - if (ret) { - if (ret != -ENOSPC && ret != -EINVAL && - ret != -EMSGSIZE) - ret = -EIO; - goto unlock; - } - - /* SW decompression success */ - inbuf += hdr->sizes[i]; - outbuf += size; - } - - *outlen = (unsigned int)(outbuf - (unsigned long)out); - -unlock: - if (ret) - /* decompress fail */ - nx842_inc_decomp_failed(local_devdata); - else { - if (!dev) - /* software decompress */ - nx842_inc_swdecomp(local_devdata); - nx842_inc_decomp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->decomp_times, - (get_tb() - start_time) / tb_ticks_per_usec); - } - - rcu_read_unlock(); - return ret; + return nx842_platform_driver()->compress(in, ilen, out, olen, wmem); } -EXPORT_SYMBOL_GPL(nx842_decompress); +EXPORT_SYMBOL_GPL(nx842_compress); -/** - * nx842_OF_set_defaults -- Set default (disabled) values for devdata - * - * @devdata - struct nx842_devdata to update - * - * Returns: - * 0 on success - * -ENOENT if @devdata ptr is NULL - */ -static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +int nx842_decompress(const unsigned char *in, unsigned int ilen, + unsigned char *out, unsigned int *olen, void *wmem) { - if (devdata) { - devdata->max_sync_size = 0; - devdata->max_sync_sg = 0; - devdata->max_sg_len = 0; - devdata->status = UNAVAILABLE; - return 0; - } else - return -ENOENT; -} - -/** - * nx842_OF_upd_status -- Update the device info from OF status prop - * - * The status property indicates if the accelerator is enabled. If the - * device is in the OF tree it indicates that the hardware is present. - * The status field indicates if the device is enabled when the status - * is 'okay'. Otherwise the device driver will be disabled. - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 - Device is available - * -EINVAL - Device is not available - */ -static int nx842_OF_upd_status(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const char *status = (const char *)prop->value; - - if (!strncmp(status, "okay", (size_t)prop->length)) { - devdata->status = AVAILABLE; - } else { - dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", - __func__, status); - devdata->status = UNAVAILABLE; - } - - return ret; -} - -/** - * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop - * - * Definition of the 'ibm,max-sg-len' OF property: - * This field indicates the maximum byte length of a scatter list - * for the platform facility. It is a single cell encoded as with encode-int. - * - * Example: - * # od -x ibm,max-sg-len - * 0000000 0000 0ff0 - * - * In this example, the maximum byte length of a scatter list is - * 0x0ff0 (4,080). - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const int *maxsglen = prop->value; - - if (prop->length != sizeof(*maxsglen)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, - prop->length, sizeof(*maxsglen)); - ret = -EINVAL; - } else { - devdata->max_sg_len = (unsigned int)min(*maxsglen, - (int)NX842_HW_PAGE_SIZE); - } - - return ret; -} - -/** - * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop - * - * Definition of the 'ibm,max-sync-cop' OF property: - * Two series of cells. The first series of cells represents the maximums - * that can be synchronously compressed. The second series of cells - * represents the maximums that can be synchronously decompressed. - * 1. The first cell in each series contains the count of the number of - * data length, scatter list elements pairs that follow – each being - * of the form - * a. One cell data byte length - * b. One cell total number of scatter list elements - * - * Example: - * # od -x ibm,max-sync-cop - * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 - * 0000020 0000 1000 0000 01fe - * - * In this example, compression supports 0x1000 (4,096) data byte length - * and 0x1fe (510) total scatter list elements. Decompression supports - * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list - * elements. - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const struct maxsynccop_t { - int comp_elements; - int comp_data_limit; - int comp_sg_limit; - int decomp_elements; - int decomp_data_limit; - int decomp_sg_limit; - } *maxsynccop; - - if (prop->length != sizeof(*maxsynccop)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, - sizeof(*maxsynccop)); - ret = -EINVAL; - goto out; - } - - maxsynccop = (const struct maxsynccop_t *)prop->value; - - /* Use one limit rather than separate limits for compression and - * decompression. Set a maximum for this so as not to exceed the - * size that the header can support and round the value down to - * the hardware page size (4K) */ - devdata->max_sync_size = - (unsigned int)min(maxsynccop->comp_data_limit, - maxsynccop->decomp_data_limit); - - devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, - SIZE_64K); - - if (devdata->max_sync_size < SIZE_4K) { - dev_err(devdata->dev, "%s: hardware max data size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_size); - ret = -EINVAL; - goto out; - } - - devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, - maxsynccop->decomp_sg_limit); - if (devdata->max_sync_sg < 1) { - dev_err(devdata->dev, "%s: hardware max sg size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_sg); - ret = -EINVAL; - goto out; - } - -out: - return ret; + return nx842_platform_driver()->decompress(in, ilen, out, olen, wmem); } +EXPORT_SYMBOL_GPL(nx842_decompress); -/** - * - * nx842_OF_upd -- Handle OF properties updates for the device. - * - * Set all properties from the OF tree. Optionally, a new property - * can be provided by the @new_prop pointer to overwrite an existing value. - * The device will remain disabled until all values are valid, this function - * will return an error for updates unless all values are valid. - * - * @new_prop: If not NULL, this property is being updated. If NULL, update - * all properties from the current values in the OF tree. - * - * Returns: - * 0 - Success - * -ENOMEM - Could not allocate memory for new devdata structure - * -EINVAL - property value not found, new_prop is not a recognized - * property for the device or property value is not valid. - * -ENODEV - Device is not available - */ -static int nx842_OF_upd(struct property *new_prop) +static __init int nx842_init(void) { - struct nx842_devdata *old_devdata = NULL; - struct nx842_devdata *new_devdata = NULL; - struct device_node *of_node = NULL; - struct property *status = NULL; - struct property *maxsglen = NULL; - struct property *maxsyncop = NULL; - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - if (old_devdata) - of_node = old_devdata->dev->of_node; + request_module("nx-compress-powernv"); + request_module("nx-compress-pseries"); - if (!old_devdata || !of_node) { - pr_err("%s: device is not available\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); - return -ENODEV; - } - - new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); - if (!new_devdata) { - dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); - ret = -ENOMEM; - goto error_out; - } - - memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); - new_devdata->counters = old_devdata->counters; - - /* Set ptrs for existing properties */ - status = of_find_property(of_node, "status", NULL); - maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); - maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); - if (!status || !maxsglen || !maxsyncop) { - dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); - ret = -EINVAL; - goto error_out; - } - - /* - * If this is a property update, there are only certain properties that - * we care about. Bail if it isn't in the below list + /* we prevent loading if there's no platform driver, and we get the + * module that set it so it won't unload, so we don't need to check + * if it's set in any of the above functions */ - if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) - goto out; - - /* Perform property updates */ - ret = nx842_OF_upd_status(new_devdata, status); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); - if (ret) - goto error_out; - -out: - dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", - __func__, new_devdata->max_sync_size, - old_devdata->max_sync_size); - dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", - __func__, new_devdata->max_sync_sg, - old_devdata->max_sync_sg); - dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", - __func__, new_devdata->max_sg_len, - old_devdata->max_sg_len); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - return 0; - -error_out: - if (new_devdata) { - dev_info(old_devdata->dev, "%s: device disabled\n", __func__); - nx842_OF_set_defaults(new_devdata); - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - } else { - dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); - } - - if (!ret) - ret = -EINVAL; - return ret; -} - -/** - * nx842_OF_notifier - Process updates to OF properties for the device - * - * @np: notifier block - * @action: notifier action - * @update: struct pSeries_reconfig_prop_update pointer if action is - * PSERIES_UPDATE_PROPERTY - * - * Returns: - * NOTIFY_OK on success - * NOTIFY_BAD encoded with error number on failure, use - * notifier_to_errno() to decode this value - */ -static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, - void *data) -{ - struct of_reconfig_data *upd = data; - struct nx842_devdata *local_devdata; - struct device_node *node = NULL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (local_devdata) - node = local_devdata->dev->of_node; - - if (local_devdata && - action == OF_RECONFIG_UPDATE_PROPERTY && - !strcmp(upd->dn->name, node->name)) { - rcu_read_unlock(); - nx842_OF_upd(upd->prop); - } else - rcu_read_unlock(); - - return NOTIFY_OK; -} - -static struct notifier_block nx842_of_nb = { - .notifier_call = nx842_OF_notifier, -}; - -#define nx842_counter_read(_name) \ -static ssize_t nx842_##_name##_show(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) { \ - struct nx842_devdata *local_devdata; \ - int p = 0; \ - rcu_read_lock(); \ - local_devdata = rcu_dereference(devdata); \ - if (local_devdata) \ - p = snprintf(buf, PAGE_SIZE, "%ld\n", \ - atomic64_read(&local_devdata->counters->_name)); \ - rcu_read_unlock(); \ - return p; \ -} - -#define NX842DEV_COUNTER_ATTR_RO(_name) \ - nx842_counter_read(_name); \ - static struct device_attribute dev_attr_##_name = __ATTR(_name, \ - 0444, \ - nx842_##_name##_show,\ - NULL); - -NX842DEV_COUNTER_ATTR_RO(comp_complete); -NX842DEV_COUNTER_ATTR_RO(comp_failed); -NX842DEV_COUNTER_ATTR_RO(decomp_complete); -NX842DEV_COUNTER_ATTR_RO(decomp_failed); -NX842DEV_COUNTER_ATTR_RO(swdecomp); - -static ssize_t nx842_timehist_show(struct device *, - struct device_attribute *, char *); - -static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, - nx842_timehist_show, NULL); -static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, - 0444, nx842_timehist_show, NULL); - -static ssize_t nx842_timehist_show(struct device *dev, - struct device_attribute *attr, char *buf) { - char *p = buf; - struct nx842_devdata *local_devdata; - atomic64_t *times; - int bytes_remain = PAGE_SIZE; - int bytes; - int i; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata) { - rcu_read_unlock(); - return 0; - } - - if (attr == &dev_attr_comp_times) - times = local_devdata->counters->comp_times; - else if (attr == &dev_attr_decomp_times) - times = local_devdata->counters->decomp_times; - else { - rcu_read_unlock(); - return 0; - } - - for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { - bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", - i ? (2<<(i-1)) : 0, (2<<i)-1, - atomic64_read(×[i])); - bytes_remain -= bytes; - p += bytes; - } - /* The last bucket holds everything over - * 2<<(NX842_HIST_SLOTS - 2) us */ - bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", - 2<<(NX842_HIST_SLOTS - 2), - atomic64_read(×[(NX842_HIST_SLOTS - 1)])); - p += bytes; - - rcu_read_unlock(); - return p - buf; -} - -static struct attribute *nx842_sysfs_entries[] = { - &dev_attr_comp_complete.attr, - &dev_attr_comp_failed.attr, - &dev_attr_decomp_complete.attr, - &dev_attr_decomp_failed.attr, - &dev_attr_swdecomp.attr, - &dev_attr_comp_times.attr, - &dev_attr_decomp_times.attr, - NULL, -}; - -static struct attribute_group nx842_attribute_group = { - .name = NULL, /* put in device directory */ - .attrs = nx842_sysfs_entries, -}; - -static int __init nx842_probe(struct vio_dev *viodev, - const struct vio_device_id *id) -{ - struct nx842_devdata *old_devdata, *new_devdata = NULL; - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - - if (old_devdata && old_devdata->vdev != NULL) { - dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); - ret = -1; - goto error_unlock; - } - - dev_set_drvdata(&viodev->dev, NULL); - - new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); - if (!new_devdata) { - dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); - ret = -ENOMEM; - goto error_unlock; - } - - new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), - GFP_NOFS); - if (!new_devdata->counters) { - dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); - ret = -ENOMEM; - goto error_unlock; - } - - new_devdata->vdev = viodev; - new_devdata->dev = &viodev->dev; - nx842_OF_set_defaults(new_devdata); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - kfree(old_devdata); - - of_reconfig_notifier_register(&nx842_of_nb); - - ret = nx842_OF_upd(NULL); - if (ret && ret != -ENODEV) { - dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); - ret = -1; - goto error; - } - - rcu_read_lock(); - dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); - rcu_read_unlock(); - - if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { - dev_err(&viodev->dev, "could not create sysfs device attributes\n"); - ret = -1; - goto error; + if (!nx842_platform_driver_get()) { + pr_err("no nx842 driver found.\n"); + return -ENODEV; } return 0; - -error_unlock: - spin_unlock_irqrestore(&devdata_mutex, flags); - if (new_devdata) - kfree(new_devdata->counters); - kfree(new_devdata); -error: - return ret; -} - -static int __exit nx842_remove(struct vio_dev *viodev) -{ - struct nx842_devdata *old_devdata; - unsigned long flags; - - pr_info("Removing IBM Power 842 compression device\n"); - sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - of_reconfig_notifier_unregister(&nx842_of_nb); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(&viodev->dev, NULL); - if (old_devdata) - kfree(old_devdata->counters); - kfree(old_devdata); - return 0; -} - -static struct vio_device_id nx842_driver_ids[] = { - {"ibm,compression-v1", "ibm,compression"}, - {"", ""}, -}; - -static struct vio_driver nx842_driver = { - .name = MODULE_NAME, - .probe = nx842_probe, - .remove = __exit_p(nx842_remove), - .get_desired_dma = nx842_get_desired_dma, - .id_table = nx842_driver_ids, -}; - -static int __init nx842_init(void) -{ - struct nx842_devdata *new_devdata; - pr_info("Registering IBM Power 842 compression driver\n"); - - RCU_INIT_POINTER(devdata, NULL); - new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); - if (!new_devdata) { - pr_err("Could not allocate memory for device data\n"); - return -ENOMEM; - } - new_devdata->status = UNAVAILABLE; - RCU_INIT_POINTER(devdata, new_devdata); - - return vio_register_driver(&nx842_driver); } - module_init(nx842_init); static void __exit nx842_exit(void) { - struct nx842_devdata *old_devdata; - unsigned long flags; - - pr_info("Exiting IBM Power 842 compression driver\n"); - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - if (old_devdata) - dev_set_drvdata(old_devdata->dev, NULL); - kfree(old_devdata); - vio_unregister_driver(&nx842_driver); + nx842_platform_driver_put(); } - module_exit(nx842_exit); - -/********************************* - * 842 software decompressor -*********************************/ -typedef int (*sw842_template_op)(const char **, int *, unsigned char **, - struct sw842_fifo *); - -static int sw842_data8(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_data4(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_data2(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr8(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr4(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr2(const char **, int *, unsigned char **, - struct sw842_fifo *); - -/* special templates */ -#define SW842_TMPL_REPEAT 0x1B -#define SW842_TMPL_ZEROS 0x1C -#define SW842_TMPL_EOF 0x1E - -static sw842_template_op sw842_tmpl_ops[26][4] = { - { sw842_data8, NULL}, /* 0 (00000) */ - { sw842_data4, sw842_data2, sw842_ptr2, NULL}, - { sw842_data4, sw842_ptr2, sw842_data2, NULL}, - { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, - { sw842_data4, sw842_ptr4, NULL}, - { sw842_data2, sw842_ptr2, sw842_data4, NULL}, - { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, - { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, - { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, - { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, - { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ - { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, - { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, - { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, - { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, - { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, - { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, - { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ - { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, - { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, - { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, - { sw842_ptr4, sw842_ptr4, NULL}, - { sw842_ptr8, NULL} -}; - -/* Software decompress helpers */ - -static uint8_t sw842_get_byte(const char *buf, int bit) -{ - uint8_t tmpl; - uint16_t tmp; - tmp = htons(*(uint16_t *)(buf)); - tmp = (uint16_t)(tmp << bit); - tmp = ntohs(tmp); - memcpy(&tmpl, &tmp, 1); - return tmpl; -} - -static uint8_t sw842_get_template(const char **buf, int *bit) -{ - uint8_t byte; - byte = sw842_get_byte(*buf, *bit); - byte = byte >> 3; - byte &= 0x1F; - *buf += (*bit + 5) / 8; - *bit = (*bit + 5) % 8; - return byte; -} - -/* repeat_count happens to be 5-bit too (like the template) */ -static uint8_t sw842_get_repeat_count(const char **buf, int *bit) -{ - uint8_t byte; - byte = sw842_get_byte(*buf, *bit); - byte = byte >> 2; - byte &= 0x3F; - *buf += (*bit + 6) / 8; - *bit = (*bit + 6) % 8; - return byte; -} - -static uint8_t sw842_get_ptr2(const char **buf, int *bit) -{ - uint8_t ptr; - ptr = sw842_get_byte(*buf, *bit); - (*buf)++; - return ptr; -} - -static uint16_t sw842_get_ptr4(const char **buf, int *bit, - struct sw842_fifo *fifo) -{ - uint16_t ptr; - ptr = htons(*(uint16_t *)(*buf)); - ptr = (uint16_t)(ptr << *bit); - ptr = ptr >> 7; - ptr &= 0x01FF; - *buf += (*bit + 9) / 8; - *bit = (*bit + 9) % 8; - return ptr; -} - -static uint8_t sw842_get_ptr8(const char **buf, int *bit, - struct sw842_fifo *fifo) -{ - return sw842_get_ptr2(buf, bit); -} - -/* Software decompress template ops */ - -static int sw842_data8(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - int ret; - - ret = sw842_data4(inbuf, inbit, outbuf, fifo); - if (ret) - return ret; - ret = sw842_data4(inbuf, inbit, outbuf, fifo); - return ret; -} - -static int sw842_data4(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - int ret; - - ret = sw842_data2(inbuf, inbit, outbuf, fifo); - if (ret) - return ret; - ret = sw842_data2(inbuf, inbit, outbuf, fifo); - return ret; -} - -static int sw842_data2(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - **outbuf = sw842_get_byte(*inbuf, *inbit); - (*inbuf)++; - (*outbuf)++; - **outbuf = sw842_get_byte(*inbuf, *inbit); - (*inbuf)++; - (*outbuf)++; - return 0; -} - -static int sw842_ptr8(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint8_t ptr; - ptr = sw842_get_ptr8(inbuf, inbit, fifo); - if (!fifo->f84_full && (ptr >= fifo->f8_count)) - return 1; - memcpy(*outbuf, fifo->f8[ptr], 8); - *outbuf += 8; - return 0; -} - -static int sw842_ptr4(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint16_t ptr; - ptr = sw842_get_ptr4(inbuf, inbit, fifo); - if (!fifo->f84_full && (ptr >= fifo->f4_count)) - return 1; - memcpy(*outbuf, fifo->f4[ptr], 4); - *outbuf += 4; - return 0; -} - -static int sw842_ptr2(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint8_t ptr; - ptr = sw842_get_ptr2(inbuf, inbit); - if (!fifo->f2_full && (ptr >= fifo->f2_count)) - return 1; - memcpy(*outbuf, fifo->f2[ptr], 2); - *outbuf += 2; - return 0; -} - -static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) -{ - unsigned char initial_f2count = fifo->f2_count; - - memcpy(fifo->f8[fifo->f8_count], buf, 8); - fifo->f4_count += 2; - fifo->f8_count += 1; - - if (!fifo->f84_full && fifo->f4_count >= 512) { - fifo->f84_full = 1; - fifo->f4_count /= 512; - } - - memcpy(fifo->f2[fifo->f2_count++], buf, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); - if (fifo->f2_count < initial_f2count) - fifo->f2_full = 1; -} - -static int sw842_decompress(const unsigned char *src, int srclen, - unsigned char *dst, int *destlen, - const void *wrkmem) -{ - uint8_t tmpl; - const char *inbuf; - int inbit = 0; - unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; - const char *inbuf_end; - sw842_template_op op; - int opindex; - int i, repeat_count; - struct sw842_fifo *fifo; - int ret = 0; - - fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; - memset(fifo, 0, sizeof(*fifo)); - - origbuf = NULL; - inbuf = src; - inbuf_end = src + srclen; - outbuf = dst; - outbuf_end = dst + *destlen; - - while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { - if (inbuf >= inbuf_end) { - ret = -EINVAL; - goto out; - } - - opindex = 0; - prevbuf = origbuf; - origbuf = outbuf; - switch (tmpl) { - case SW842_TMPL_REPEAT: - if (prevbuf == NULL) { - ret = -EINVAL; - goto out; - } - - repeat_count = sw842_get_repeat_count(&inbuf, - &inbit) + 1; - - /* Did the repeat count advance past the end of input */ - if (inbuf > inbuf_end) { - ret = -EINVAL; - goto out; - } - - for (i = 0; i < repeat_count; i++) { - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - memcpy(outbuf, prevbuf, 8); - sw842_copy_to_fifo(outbuf, fifo); - outbuf += 8; - } - break; - - case SW842_TMPL_ZEROS: - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - memset(outbuf, 0, 8); - sw842_copy_to_fifo(outbuf, fifo); - outbuf += 8; - break; - - default: - if (tmpl > 25) { - ret = -EINVAL; - goto out; - } - - /* Does this go past the end of the input buffer */ - if ((inbuf + 2) > inbuf_end) { - ret = -EINVAL; - goto out; - } - - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - while (opindex < 4 && - (op = sw842_tmpl_ops[tmpl][opindex++]) - != NULL) { - ret = (*op)(&inbuf, &inbit, &outbuf, fifo); - if (ret) { - ret = -EINVAL; - goto out; - } - sw842_copy_to_fifo(origbuf, fifo); - } - } - } - -out: - if (!ret) - *destlen = (unsigned int)(outbuf - dst); - else - *destlen = 0; - - return ret; -} diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h new file mode 100644 index 0000000..ac0ea79 --- /dev/null +++ b/drivers/crypto/nx/nx-842.h @@ -0,0 +1,144 @@ + +#ifndef __NX_842_H__ +#define __NX_842_H__ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sw842.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/ratelimit.h> + +/* Restrictions on Data Descriptor List (DDL) and Entry (DDE) buffers + * + * From NX P8 workbook, sec 4.9.1 "842 details" + * Each DDE buffer is 128 byte aligned + * Each DDE buffer size is a multiple of 32 bytes (except the last) + * The last DDE buffer size is a multiple of 8 bytes + */ +#define DDE_BUFFER_ALIGN (128) +#define DDE_BUFFER_SIZE_MULT (32) +#define DDE_BUFFER_LAST_MULT (8) + +/* Arbitrary DDL length limit + * Allows max buffer size of MAX-1 to MAX pages + * (depending on alignment) + */ +#define DDL_LEN_MAX (17) + +/* CCW 842 CI/FC masks + * NX P8 workbook, section 4.3.1, figure 4-6 + * "CI/FC Boundary by NX CT type" + */ +#define CCW_CI_842 (0x00003ff8) +#define CCW_FC_842 (0x00000007) + +/* CCW Function Codes (FC) for 842 + * NX P8 workbook, section 4.9, table 4-28 + * "Function Code Definitions for 842 Memory Compression" + */ +#define CCW_FC_842_COMP_NOCRC (0) +#define CCW_FC_842_COMP_CRC (1) +#define CCW_FC_842_DECOMP_NOCRC (2) +#define CCW_FC_842_DECOMP_CRC (3) +#define CCW_FC_842_MOVE (4) + +/* CSB CC Error Types for 842 + * NX P8 workbook, section 4.10.3, table 4-30 + * "Reported Error Types Summary Table" + */ +/* These are all duplicates of existing codes defined in icswx.h. */ +#define CSB_CC_TRANSLATION_DUP1 (80) +#define CSB_CC_TRANSLATION_DUP2 (82) +#define CSB_CC_TRANSLATION_DUP3 (84) +#define CSB_CC_TRANSLATION_DUP4 (86) +#define CSB_CC_TRANSLATION_DUP5 (92) +#define CSB_CC_TRANSLATION_DUP6 (94) +#define CSB_CC_PROTECTION_DUP1 (81) +#define CSB_CC_PROTECTION_DUP2 (83) +#define CSB_CC_PROTECTION_DUP3 (85) +#define CSB_CC_PROTECTION_DUP4 (87) +#define CSB_CC_PROTECTION_DUP5 (93) +#define CSB_CC_PROTECTION_DUP6 (95) +#define CSB_CC_RD_EXTERNAL_DUP1 (89) +#define CSB_CC_RD_EXTERNAL_DUP2 (90) +#define CSB_CC_RD_EXTERNAL_DUP3 (91) +/* These are specific to NX */ +/* 842 codes */ +#define CSB_CC_TPBC_GT_SPBC (64) /* no error, but >1 comp ratio */ +#define CSB_CC_CRC_MISMATCH (65) /* decomp crc mismatch */ +#define CSB_CC_TEMPL_INVALID (66) /* decomp invalid template value */ +#define CSB_CC_TEMPL_OVERFLOW (67) /* decomp template shows data after end */ +/* sym crypt codes */ +#define CSB_CC_DECRYPT_OVERFLOW (64) +/* asym crypt codes */ +#define CSB_CC_MINV_OVERFLOW (128) +/* These are reserved for hypervisor use */ +#define CSB_CC_HYP_RESERVE_START (240) +#define CSB_CC_HYP_RESERVE_END (253) +#define CSB_CC_HYP_NO_HW (254) +#define CSB_CC_HYP_HANG_ABORTED (255) + +/* CCB Completion Modes (CM) for 842 + * NX P8 workbook, section 4.3, figure 4-5 + * "CRB Details - Normal Cop_Req (CL=00, C=1)" + */ +#define CCB_CM_EXTRA_WRITE (CCB_CM0_ALL_COMPLETIONS & CCB_CM12_STORE) +#define CCB_CM_INTERRUPT (CCB_CM0_ALL_COMPLETIONS & CCB_CM12_INTERRUPT) + +#define LEN_ON_SIZE(pa, size) ((size) - ((pa) & ((size) - 1))) +#define LEN_ON_PAGE(pa) LEN_ON_SIZE(pa, PAGE_SIZE) + +static inline unsigned long nx842_get_pa(void *addr) +{ + if (!is_vmalloc_addr(addr)) + return __pa(addr); + + return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr); +} + +/* Get/Set bit fields */ +#define MASK_LSH(m) (__builtin_ffsl(m) - 1) +#define GET_FIELD(v, m) (((v) & (m)) >> MASK_LSH(m)) +#define SET_FIELD(v, m, val) (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m))) + +struct nx842_constraints { + int alignment; + int multiple; + int minimum; + int maximum; +}; + +struct nx842_driver { + char *name; + struct module *owner; + size_t workmem_size; + + struct nx842_constraints *constraints; + + int (*compress)(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem); + int (*decompress)(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem); +}; + +struct nx842_driver *nx842_platform_driver(void); +bool nx842_platform_driver_set(struct nx842_driver *driver); +void nx842_platform_driver_unset(struct nx842_driver *driver); +bool nx842_platform_driver_get(void); +void nx842_platform_driver_put(void); + +size_t nx842_workmem_size(void); + +int nx842_constraints(struct nx842_constraints *constraints); + +int nx842_compress(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, void *wrkmem); +int nx842_decompress(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, void *wrkmem); + +#endif /* __NX_842_H__ */ diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 88c5624..08ac6d4 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -93,17 +93,6 @@ out: return rc; } -static int gcm_aes_nx_setauthsize(struct crypto_aead *tfm, - unsigned int authsize) -{ - if (authsize > crypto_aead_alg(tfm)->maxauthsize) - return -EINVAL; - - crypto_aead_crt(tfm)->authsize = authsize; - - return 0; -} - static int gcm4106_aes_nx_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { @@ -116,8 +105,6 @@ static int gcm4106_aes_nx_setauthsize(struct crypto_aead *tfm, return -EINVAL; } - crypto_aead_crt(tfm)->authsize = authsize; - return 0; } @@ -134,7 +121,7 @@ static int nx_gca(struct nx_crypto_ctx *nx_ctx, unsigned int max_sg_len; if (nbytes <= AES_BLOCK_SIZE) { - scatterwalk_start(&walk, req->assoc); + scatterwalk_start(&walk, req->src); scatterwalk_copychunks(out, &walk, nbytes, SCATTERWALK_FROM_SG); scatterwalk_done(&walk, SCATTERWALK_FROM_SG, 0); return 0; @@ -159,7 +146,7 @@ static int nx_gca(struct nx_crypto_ctx *nx_ctx, NX_PAGE_SIZE * (max_sg_len - 1)); nx_sg = nx_walk_and_build(nx_ctx->in_sg, max_sg_len, - req->assoc, processed, &to_process); + req->src, processed, &to_process); if ((to_process + processed) < nbytes) NX_CPB_FDM(csbcpb_aead) |= NX_FDM_INTERMEDIATE; @@ -225,7 +212,7 @@ static int gmac(struct aead_request *req, struct blkcipher_desc *desc) NX_PAGE_SIZE * (max_sg_len - 1)); nx_sg = nx_walk_and_build(nx_ctx->in_sg, max_sg_len, - req->assoc, processed, &to_process); + req->src, processed, &to_process); if ((to_process + processed) < nbytes) NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; @@ -377,7 +364,8 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) csbcpb->cpb.aes_gcm.bit_length_data = nbytes * 8; desc.tfm = (struct crypto_blkcipher *) req->base.tfm; rc = nx_build_sg_lists(nx_ctx, &desc, req->dst, - req->src, &to_process, processed, + req->src, &to_process, + processed + req->assoclen, csbcpb->cpb.aes_gcm.iv_or_cnt); if (rc) @@ -412,17 +400,19 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc) mac: if (enc) { /* copy out the auth tag */ - scatterwalk_map_and_copy(csbcpb->cpb.aes_gcm.out_pat_or_mac, - req->dst, nbytes, - crypto_aead_authsize(crypto_aead_reqtfm(req)), - SCATTERWALK_TO_SG); + scatterwalk_map_and_copy( + csbcpb->cpb.aes_gcm.out_pat_or_mac, + req->dst, req->assoclen + nbytes, + crypto_aead_authsize(crypto_aead_reqtfm(req)), + SCATTERWALK_TO_SG); } else { u8 *itag = nx_ctx->priv.gcm.iauth_tag; u8 *otag = csbcpb->cpb.aes_gcm.out_pat_or_mac; - scatterwalk_map_and_copy(itag, req->src, nbytes, - crypto_aead_authsize(crypto_aead_reqtfm(req)), - SCATTERWALK_FROM_SG); + scatterwalk_map_and_copy( + itag, req->src, req->assoclen + nbytes, + crypto_aead_authsize(crypto_aead_reqtfm(req)), + SCATTERWALK_FROM_SG); rc = memcmp(itag, otag, crypto_aead_authsize(crypto_aead_reqtfm(req))) ? -EBADMSG : 0; @@ -481,45 +471,39 @@ static int gcm4106_aes_nx_decrypt(struct aead_request *req) * during encrypt/decrypt doesn't solve this problem, because it calls * blkcipher_walk_done under the covers, which doesn't use walk->blocksize, * but instead uses this tfm->blocksize. */ -struct crypto_alg nx_gcm_aes_alg = { - .cra_name = "gcm(aes)", - .cra_driver_name = "gcm-aes-nx", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AEAD, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_type = &crypto_aead_type, - .cra_module = THIS_MODULE, - .cra_init = nx_crypto_ctx_aes_gcm_init, - .cra_exit = nx_crypto_ctx_exit, - .cra_aead = { - .ivsize = AES_BLOCK_SIZE, - .maxauthsize = AES_BLOCK_SIZE, - .setkey = gcm_aes_nx_set_key, - .setauthsize = gcm_aes_nx_setauthsize, - .encrypt = gcm_aes_nx_encrypt, - .decrypt = gcm_aes_nx_decrypt, - } +struct aead_alg nx_gcm_aes_alg = { + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-nx", + .cra_priority = 300, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct nx_crypto_ctx), + .cra_module = THIS_MODULE, + }, + .init = nx_crypto_ctx_aes_gcm_init, + .exit = nx_crypto_ctx_aead_exit, + .ivsize = 12, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = gcm_aes_nx_set_key, + .encrypt = gcm_aes_nx_encrypt, + .decrypt = gcm_aes_nx_decrypt, }; -struct crypto_alg nx_gcm4106_aes_alg = { - .cra_name = "rfc4106(gcm(aes))", - .cra_driver_name = "rfc4106-gcm-aes-nx", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AEAD, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct nx_crypto_ctx), - .cra_type = &crypto_nivaead_type, - .cra_module = THIS_MODULE, - .cra_init = nx_crypto_ctx_aes_gcm_init, - .cra_exit = nx_crypto_ctx_exit, - .cra_aead = { - .ivsize = 8, - .maxauthsize = AES_BLOCK_SIZE, - .geniv = "seqiv", - .setkey = gcm4106_aes_nx_set_key, - .setauthsize = gcm4106_aes_nx_setauthsize, - .encrypt = gcm4106_aes_nx_encrypt, - .decrypt = gcm4106_aes_nx_decrypt, - } +struct aead_alg nx_gcm4106_aes_alg = { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aes-nx", + .cra_priority = 300, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct nx_crypto_ctx), + .cra_module = THIS_MODULE, + }, + .init = nx_crypto_ctx_aes_gcm_init, + .exit = nx_crypto_ctx_aead_exit, + .ivsize = 8, + .maxauthsize = AES_BLOCK_SIZE, + .setkey = gcm4106_aes_nx_set_key, + .setauthsize = gcm4106_aes_nx_setauthsize, + .encrypt = gcm4106_aes_nx_encrypt, + .decrypt = gcm4106_aes_nx_decrypt, }; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 23621da..4e91bdb 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -33,8 +33,9 @@ static int nx_sha256_init(struct shash_desc *desc) { struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -44,15 +45,18 @@ static int nx_sha256_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *) sctx->state, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc) - goto out; + if (len != SHA256_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -64,7 +68,6 @@ static int nx_sha256_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be32(SHA256_H7); sctx->count = 0; -out: return 0; } @@ -74,10 +77,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -97,6 +102,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -108,25 +119,22 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, + max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA256); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); to_process = (data_len + buf_len); leftover = total - to_process; @@ -173,12 +181,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; unsigned long irq_flags; - int rc; + u32 max_sg_len; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count >= SHA256_BLOCK_SIZE) { @@ -195,25 +210,24 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8); len = sctx->count & (SHA256_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf, + &len, max_sg_len); - if (rc || len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) + if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, max_sg_len); - if (rc || len != SHA256_DIGEST_SIZE) + if (len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; goto out; + } + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; goto out; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index b3adf10..e6a58d2 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -32,8 +32,9 @@ static int nx_sha512_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -43,15 +44,18 @@ static int nx_sha512_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *)sctx->state, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc || len != SHA512_DIGEST_SIZE) - goto out; + if (len != SHA512_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -63,7 +67,6 @@ static int nx_sha512_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be64(SHA512_H7); sctx->count[0] = 0; -out: return 0; } @@ -73,10 +76,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -96,6 +101,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -108,25 +119,26 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA512); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc || data_len != (to_process - buf_len)) + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + + if (data_len != (to_process - buf_len)) { + rc = -EINVAL; goto out; + } to_process = (data_len + buf_len); leftover = total - to_process; @@ -172,13 +184,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; u64 count0; unsigned long irq_flags; - int rc; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count[0] >= SHA512_BLOCK_SIZE) { @@ -200,24 +219,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha512.message_bit_length_lo = count0; len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *)sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len, + max_sg_len); - if (rc || len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) + if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, + max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 1da6dc5..f6198f2 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -19,8 +19,8 @@ * Author: Kent Yoder <yoder1@us.ibm.com> */ +#include <crypto/internal/aead.h> #include <crypto/internal/hash.h> -#include <crypto/hash.h> #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/algapi.h> @@ -29,10 +29,10 @@ #include <linux/moduleparam.h> #include <linux/types.h> #include <linux/mm.h> -#include <linux/crypto.h> #include <linux/scatterlist.h> #include <linux/device.h> #include <linux/of.h> +#include <linux/types.h> #include <asm/hvcall.h> #include <asm/vio.h> @@ -215,8 +215,15 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @delta: is the amount we need to crop in order to bound the list. * */ -static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int delta) +static long int trim_sg_list(struct nx_sg *sg, + struct nx_sg *end, + unsigned int delta, + unsigned int *nbytes) { + long int oplen; + long int data_back; + unsigned int is_delta = delta; + while (delta && end > sg) { struct nx_sg *last = end - 1; @@ -228,54 +235,20 @@ static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int d delta -= last->len; } } - return (sg - end) * sizeof(struct nx_sg); -} -/** - * nx_sha_build_sg_list - walk and build sg list to sha modes - * using right bounds and limits. - * @nx_ctx: NX crypto context for the lists we're building - * @nx_sg: current sg list in or out list - * @op_len: current op_len to be used in order to build a sg list - * @nbytes: number or bytes to be processed - * @offset: buf offset - * @mode: SHA256 or SHA512 - */ -int nx_sha_build_sg_list(struct nx_crypto_ctx *nx_ctx, - struct nx_sg *nx_in_outsg, - s64 *op_len, - unsigned int *nbytes, - u8 *offset, - u32 mode) -{ - unsigned int delta = 0; - unsigned int total = *nbytes; - struct nx_sg *nx_insg = nx_in_outsg; - unsigned int max_sg_len; - - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); - - *nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen); - nx_insg = nx_build_sg_list(nx_insg, offset, nbytes, max_sg_len); - - switch (mode) { - case NX_DS_SHA256: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA256_BLOCK_SIZE - 1)); - break; - case NX_DS_SHA512: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA512_BLOCK_SIZE - 1)); - break; - default: - return -EINVAL; + /* There are cases where we need to crop list in order to make it + * a block size multiple, but we also need to align data. In order to + * that we need to calculate how much we need to put back to be + * processed + */ + oplen = (sg - end) * sizeof(struct nx_sg); + if (is_delta) { + data_back = (abs(oplen) / AES_BLOCK_SIZE) * sg->len; + data_back = *nbytes - (data_back & ~(AES_BLOCK_SIZE - 1)); + *nbytes -= data_back; } - *op_len = trim_sg_list(nx_in_outsg, nx_insg, delta); - return 0; + return oplen; } /** @@ -330,8 +303,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, /* these lengths should be negative, which will indicate to phyp that * the input and output parameters are scatterlists, not linear * buffers */ - nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta); - nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta); + nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta, nbytes); + nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta, nbytes); return 0; } @@ -426,6 +399,13 @@ static void nx_of_update_msc(struct device *dev, goto next_loop; } + if (!trip->sglen || trip->databytelen < NX_PAGE_SIZE) { + dev_warn(dev, "bogus sglen/databytelen: " + "%u/%u (ignored)\n", trip->sglen, + trip->databytelen); + goto next_loop; + } + switch (trip->keybitlen) { case 128: case 160: @@ -518,6 +498,72 @@ static void nx_of_init(struct device *dev, struct nx_of *props) nx_of_update_msc(dev, p, props); } +static bool nx_check_prop(struct device *dev, u32 fc, u32 mode, int slot) +{ + struct alg_props *props = &nx_driver.of.ap[fc][mode][slot]; + + if (!props->sglen || props->databytelen < NX_PAGE_SIZE) { + if (dev) + dev_warn(dev, "bogus sglen/databytelen for %u/%u/%u: " + "%u/%u (ignored)\n", fc, mode, slot, + props->sglen, props->databytelen); + return false; + } + + return true; +} + +static bool nx_check_props(struct device *dev, u32 fc, u32 mode) +{ + int i; + + for (i = 0; i < 3; i++) + if (!nx_check_prop(dev, fc, mode, i)) + return false; + + return true; +} + +static int nx_register_alg(struct crypto_alg *alg, u32 fc, u32 mode) +{ + return nx_check_props(&nx_driver.viodev->dev, fc, mode) ? + crypto_register_alg(alg) : 0; +} + +static int nx_register_aead(struct aead_alg *alg, u32 fc, u32 mode) +{ + return nx_check_props(&nx_driver.viodev->dev, fc, mode) ? + crypto_register_aead(alg) : 0; +} + +static int nx_register_shash(struct shash_alg *alg, u32 fc, u32 mode, int slot) +{ + return (slot >= 0 ? nx_check_prop(&nx_driver.viodev->dev, + fc, mode, slot) : + nx_check_props(&nx_driver.viodev->dev, fc, mode)) ? + crypto_register_shash(alg) : 0; +} + +static void nx_unregister_alg(struct crypto_alg *alg, u32 fc, u32 mode) +{ + if (nx_check_props(NULL, fc, mode)) + crypto_unregister_alg(alg); +} + +static void nx_unregister_aead(struct aead_alg *alg, u32 fc, u32 mode) +{ + if (nx_check_props(NULL, fc, mode)) + crypto_unregister_aead(alg); +} + +static void nx_unregister_shash(struct shash_alg *alg, u32 fc, u32 mode, + int slot) +{ + if (slot >= 0 ? nx_check_prop(NULL, fc, mode, slot) : + nx_check_props(NULL, fc, mode)) + crypto_unregister_shash(alg); +} + /** * nx_register_algs - register algorithms with the crypto API * @@ -542,72 +588,77 @@ static int nx_register_algs(void) nx_driver.of.status = NX_OKAY; - rc = crypto_register_alg(&nx_ecb_aes_alg); + rc = nx_register_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB); if (rc) goto out; - rc = crypto_register_alg(&nx_cbc_aes_alg); + rc = nx_register_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC); if (rc) goto out_unreg_ecb; - rc = crypto_register_alg(&nx_ctr_aes_alg); + rc = nx_register_alg(&nx_ctr_aes_alg, NX_FC_AES, NX_MODE_AES_CTR); if (rc) goto out_unreg_cbc; - rc = crypto_register_alg(&nx_ctr3686_aes_alg); + rc = nx_register_alg(&nx_ctr3686_aes_alg, NX_FC_AES, NX_MODE_AES_CTR); if (rc) goto out_unreg_ctr; - rc = crypto_register_alg(&nx_gcm_aes_alg); + rc = nx_register_aead(&nx_gcm_aes_alg, NX_FC_AES, NX_MODE_AES_GCM); if (rc) goto out_unreg_ctr3686; - rc = crypto_register_alg(&nx_gcm4106_aes_alg); + rc = nx_register_aead(&nx_gcm4106_aes_alg, NX_FC_AES, NX_MODE_AES_GCM); if (rc) goto out_unreg_gcm; - rc = crypto_register_alg(&nx_ccm_aes_alg); + rc = nx_register_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM); if (rc) goto out_unreg_gcm4106; - rc = crypto_register_alg(&nx_ccm4309_aes_alg); + rc = nx_register_alg(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM); if (rc) goto out_unreg_ccm; - rc = crypto_register_shash(&nx_shash_sha256_alg); + rc = nx_register_shash(&nx_shash_sha256_alg, NX_FC_SHA, NX_MODE_SHA, + NX_PROPS_SHA256); if (rc) goto out_unreg_ccm4309; - rc = crypto_register_shash(&nx_shash_sha512_alg); + rc = nx_register_shash(&nx_shash_sha512_alg, NX_FC_SHA, NX_MODE_SHA, + NX_PROPS_SHA512); if (rc) goto out_unreg_s256; - rc = crypto_register_shash(&nx_shash_aes_xcbc_alg); + rc = nx_register_shash(&nx_shash_aes_xcbc_alg, + NX_FC_AES, NX_MODE_AES_XCBC_MAC, -1); if (rc) goto out_unreg_s512; goto out; out_unreg_s512: - crypto_unregister_shash(&nx_shash_sha512_alg); + nx_unregister_shash(&nx_shash_sha512_alg, NX_FC_SHA, NX_MODE_SHA, + NX_PROPS_SHA512); out_unreg_s256: - crypto_unregister_shash(&nx_shash_sha256_alg); + nx_unregister_shash(&nx_shash_sha256_alg, NX_FC_SHA, NX_MODE_SHA, + NX_PROPS_SHA256); out_unreg_ccm4309: - crypto_unregister_alg(&nx_ccm4309_aes_alg); + nx_unregister_alg(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM); out_unreg_ccm: - crypto_unregister_alg(&nx_ccm_aes_alg); + nx_unregister_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM); out_unreg_gcm4106: - crypto_unregister_alg(&nx_gcm4106_aes_alg); + nx_unregister_aead(&nx_gcm4106_aes_alg, NX_FC_AES, NX_MODE_AES_GCM); out_unreg_gcm: - crypto_unregister_alg(&nx_gcm_aes_alg); + nx_unregister_aead(&nx_gcm_aes_alg, NX_FC_AES, NX_MODE_AES_GCM); out_unreg_ctr3686: - crypto_unregister_alg(&nx_ctr3686_aes_alg); + nx_unregister_alg(&nx_ctr3686_aes_alg, NX_FC_AES, NX_MODE_AES_CTR); out_unreg_ctr: - crypto_unregister_alg(&nx_ctr_aes_alg); + nx_unregister_alg(&nx_ctr_aes_alg, NX_FC_AES, NX_MODE_AES_CTR); out_unreg_cbc: - crypto_unregister_alg(&nx_cbc_aes_alg); + nx_unregister_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC); out_unreg_ecb: - crypto_unregister_alg(&nx_ecb_aes_alg); + nx_unregister_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB); out: return rc; } @@ -666,9 +717,9 @@ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm) NX_MODE_AES_CCM); } -int nx_crypto_ctx_aes_gcm_init(struct crypto_tfm *tfm) +int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm) { - return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES, + return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES, NX_MODE_AES_GCM); } @@ -720,6 +771,13 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm) nx_ctx->out_sg = NULL; } +void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm) +{ + struct nx_crypto_ctx *nx_ctx = crypto_aead_ctx(tfm); + + kzfree(nx_ctx->kmem); +} + static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id) { dev_dbg(&viodev->dev, "driver probed: %s resource id: 0x%x\n", @@ -746,17 +804,24 @@ static int nx_remove(struct vio_dev *viodev) if (nx_driver.of.status == NX_OKAY) { NX_DEBUGFS_FINI(&nx_driver); - crypto_unregister_alg(&nx_ccm_aes_alg); - crypto_unregister_alg(&nx_ccm4309_aes_alg); - crypto_unregister_alg(&nx_gcm_aes_alg); - crypto_unregister_alg(&nx_gcm4106_aes_alg); - crypto_unregister_alg(&nx_ctr_aes_alg); - crypto_unregister_alg(&nx_ctr3686_aes_alg); - crypto_unregister_alg(&nx_cbc_aes_alg); - crypto_unregister_alg(&nx_ecb_aes_alg); - crypto_unregister_shash(&nx_shash_sha256_alg); - crypto_unregister_shash(&nx_shash_sha512_alg); - crypto_unregister_shash(&nx_shash_aes_xcbc_alg); + nx_unregister_shash(&nx_shash_aes_xcbc_alg, + NX_FC_AES, NX_MODE_AES_XCBC_MAC, -1); + nx_unregister_shash(&nx_shash_sha512_alg, + NX_FC_SHA, NX_MODE_SHA, NX_PROPS_SHA256); + nx_unregister_shash(&nx_shash_sha256_alg, + NX_FC_SHA, NX_MODE_SHA, NX_PROPS_SHA512); + nx_unregister_alg(&nx_ccm4309_aes_alg, + NX_FC_AES, NX_MODE_AES_CCM); + nx_unregister_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM); + nx_unregister_aead(&nx_gcm4106_aes_alg, + NX_FC_AES, NX_MODE_AES_GCM); + nx_unregister_aead(&nx_gcm_aes_alg, + NX_FC_AES, NX_MODE_AES_GCM); + nx_unregister_alg(&nx_ctr3686_aes_alg, + NX_FC_AES, NX_MODE_AES_CTR); + nx_unregister_alg(&nx_ctr_aes_alg, NX_FC_AES, NX_MODE_AES_CTR); + nx_unregister_alg(&nx_cbc_aes_alg, NX_FC_AES, NX_MODE_AES_CBC); + nx_unregister_alg(&nx_ecb_aes_alg, NX_FC_AES, NX_MODE_AES_ECB); } return 0; diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 6c9ecaa..de3ea87 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -143,18 +143,17 @@ struct nx_crypto_ctx { /* prototypes */ int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm); -int nx_crypto_ctx_aes_gcm_init(struct crypto_tfm *tfm); +int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm); int nx_crypto_ctx_aes_xcbc_init(struct crypto_tfm *tfm); int nx_crypto_ctx_aes_ctr_init(struct crypto_tfm *tfm); int nx_crypto_ctx_aes_cbc_init(struct crypto_tfm *tfm); int nx_crypto_ctx_aes_ecb_init(struct crypto_tfm *tfm); int nx_crypto_ctx_sha_init(struct crypto_tfm *tfm); void nx_crypto_ctx_exit(struct crypto_tfm *tfm); +void nx_crypto_ctx_aead_exit(struct crypto_aead *tfm); void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function); int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op, u32 may_sleep); -int nx_sha_build_sg_list(struct nx_crypto_ctx *, struct nx_sg *, - s64 *, unsigned int *, u8 *, u32); struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32); int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *, struct scatterlist *, struct scatterlist *, unsigned int *, @@ -178,8 +177,8 @@ void nx_debugfs_fini(struct nx_crypto_driver *); extern struct crypto_alg nx_cbc_aes_alg; extern struct crypto_alg nx_ecb_aes_alg; -extern struct crypto_alg nx_gcm_aes_alg; -extern struct crypto_alg nx_gcm4106_aes_alg; +extern struct aead_alg nx_gcm_aes_alg; +extern struct aead_alg nx_gcm4106_aes_alg; extern struct crypto_alg nx_ctr_aes_alg; extern struct crypto_alg nx_ctr3686_aes_alg; extern struct crypto_alg nx_ccm_aes_alg; diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4d63e0d..b2024c95 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -362,7 +362,13 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req) static int omap_sham_hw_init(struct omap_sham_dev *dd) { - pm_runtime_get_sync(dd->dev); + int err; + + err = pm_runtime_get_sync(dd->dev); + if (err < 0) { + dev_err(dd->dev, "failed to get sync: %d\n", err); + return err; + } if (!test_bit(FLAGS_INIT, &dd->flags)) { set_bit(FLAGS_INIT, &dd->flags); @@ -1793,6 +1799,10 @@ static const struct of_device_id omap_sham_of_match[] = { .data = &omap_sham_pdata_omap2, }, { + .compatible = "ti,omap3-sham", + .data = &omap_sham_pdata_omap2, + }, + { .compatible = "ti,omap4-sham", .data = &omap_sham_pdata_omap4, }, @@ -1947,7 +1957,13 @@ static int omap_sham_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_irq_safe(dev); - pm_runtime_get_sync(dev); + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get sync: %d\n", err); + goto err_pm; + } + rev = omap_sham_read(dd, SHA_REG_REV(dd)); pm_runtime_put_sync(&pdev->dev); @@ -1977,6 +1993,7 @@ err_algs: for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) crypto_unregister_ahash( &dd->pdata->algs_info[i].algs_list[j]); +err_pm: pm_runtime_disable(dev); if (dd->dma_lch) dma_release_channel(dd->dma_lch); @@ -2019,7 +2036,11 @@ static int omap_sham_suspend(struct device *dev) static int omap_sham_resume(struct device *dev) { - pm_runtime_get_sync(dev); + int err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get sync: %d\n", err); + return err; + } return 0; } #endif diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 5da5b98..4f56f36 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/authenc.h> @@ -40,6 +40,7 @@ #include <linux/rtnetlink.h> #include <linux/scatterlist.h> #include <linux/sched.h> +#include <linux/sizes.h> #include <linux/slab.h> #include <linux/timer.h> @@ -261,18 +262,9 @@ static unsigned spacc_load_ctx(struct spacc_generic_ctx *ctx, } /* Count the number of scatterlist entries in a scatterlist. */ -static int sg_count(struct scatterlist *sg_list, int nbytes) +static inline int sg_count(struct scatterlist *sg_list, int nbytes) { - struct scatterlist *sg = sg_list; - int sg_nents = 0; - - while (nbytes > 0) { - ++sg_nents; - nbytes -= sg->length; - sg = sg_next(sg); - } - - return sg_nents; + return sg_nents_for_len(sg_list, nbytes); } static inline void ddt_set(struct spacc_ddt *ddt, dma_addr_t phys, size_t len) @@ -326,6 +318,7 @@ static int spacc_aead_make_ddts(struct spacc_req *req, u8 *giv) struct spacc_ddt *src_ddt, *dst_ddt; unsigned ivsize = crypto_aead_ivsize(crypto_aead_reqtfm(areq)); unsigned nents = sg_count(areq->src, areq->cryptlen); + unsigned total; dma_addr_t iv_addr; struct scatterlist *cur; int i, dst_ents, src_ents, assoc_ents; @@ -369,11 +362,18 @@ static int spacc_aead_make_ddts(struct spacc_req *req, u8 *giv) * Map the associated data. For decryption we don't copy the * associated data. */ + total = areq->assoclen; for_each_sg(areq->assoc, cur, assoc_ents, i) { - ddt_set(src_ddt++, sg_dma_address(cur), sg_dma_len(cur)); + unsigned len = sg_dma_len(cur); + + if (len > total) + len = total; + + total -= len; + + ddt_set(src_ddt++, sg_dma_address(cur), len); if (req->is_encrypt) - ddt_set(dst_ddt++, sg_dma_address(cur), - sg_dma_len(cur)); + ddt_set(dst_ddt++, sg_dma_address(cur), len); } ddt_set(src_ddt++, iv_addr, ivsize); @@ -790,7 +790,8 @@ static int spacc_aead_cra_init(struct crypto_tfm *tfm) get_random_bytes(ctx->salt, sizeof(ctx->salt)); - tfm->crt_aead.reqsize = sizeof(struct spacc_req); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct spacc_req)); return 0; } @@ -1754,15 +1755,15 @@ static int spacc_probe(struct platform_device *pdev) return PTR_ERR(engine->clk); } - if (clk_enable(engine->clk)) { - dev_info(&pdev->dev, "unable to enable clk\n"); + if (clk_prepare_enable(engine->clk)) { + dev_info(&pdev->dev, "unable to prepare/enable clk\n"); clk_put(engine->clk); return -EIO; } err = device_create_file(&pdev->dev, &dev_attr_stat_irq_thresh); if (err) { - clk_disable(engine->clk); + clk_disable_unprepare(engine->clk); clk_put(engine->clk); return err; } @@ -1830,7 +1831,7 @@ static int spacc_remove(struct platform_device *pdev) crypto_unregister_alg(&alg->alg); } - clk_disable(engine->clk); + clk_disable_unprepare(engine->clk); clk_put(engine->clk); return 0; diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 49bede2..6fdb9e8 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -2,9 +2,8 @@ config CRYPTO_DEV_QAT tristate select CRYPTO_AEAD select CRYPTO_AUTHENC - select CRYPTO_ALGAPI - select CRYPTO_AES - select CRYPTO_CBC + select CRYPTO_BLKCIPHER + select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 @@ -13,7 +12,6 @@ config CRYPTO_DEV_QAT config CRYPTO_DEV_QAT_DH895xCC tristate "Support for Intel(R) DH895xCC" depends on X86 && PCI - default n select CRYPTO_DEV_QAT help Support for Intel(R) DH895xcc with Intel(R) QuickAssist Technology diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index f22ce71..5fe9029 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -48,7 +48,6 @@ #define ADF_ACCEL_DEVICES_H_ #include <linux/module.h> #include <linux/list.h> -#include <linux/proc_fs.h> #include <linux/io.h> #include "adf_cfg_common.h" diff --git a/drivers/crypto/qat/qat_common/adf_cfg_user.h b/drivers/crypto/qat/qat_common/adf_cfg_user.h index 0c38a15..ef5988a 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_user.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_user.h @@ -54,14 +54,6 @@ struct adf_user_cfg_key_val { char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; union { - char *user_val_ptr; - uint64_t padding1; - }; - union { - struct adf_user_cfg_key_val *prev; - uint64_t padding2; - }; - union { struct adf_user_cfg_key_val *next; uint64_t padding3; }; @@ -75,10 +67,6 @@ struct adf_user_cfg_section { uint64_t padding1; }; union { - struct adf_user_cfg_section *prev; - uint64_t padding2; - }; - union { struct adf_user_cfg_section *next; uint64_t padding3; }; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 0666ee6..27e16c0 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -53,6 +53,13 @@ #include "icp_qat_fw_loader_handle.h" #include "icp_qat_hal.h" +#define ADF_MAJOR_VERSION 0 +#define ADF_MINOR_VERSION 1 +#define ADF_BUILD_VERSION 3 +#define ADF_DRV_VERSION __stringify(ADF_MAJOR_VERSION) "." \ + __stringify(ADF_MINOR_VERSION) "." \ + __stringify(ADF_BUILD_VERSION) + #define ADF_STATUS_RESTARTING 0 #define ADF_STATUS_STARTING 1 #define ADF_STATUS_CONFIGURED 2 diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index cb5f066..e056b9e 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -504,3 +504,4 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_ALIAS_CRYPTO("intel_qat"); +MODULE_VERSION(ADF_DRV_VERSION); diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1dc5b0a..067402c 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -47,7 +47,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/crypto.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/hash.h> @@ -653,7 +653,7 @@ static void qat_alg_free_bufl(struct qat_crypto_instance *inst, } static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, - struct scatterlist *assoc, + struct scatterlist *assoc, int assoclen, struct scatterlist *sgl, struct scatterlist *sglout, uint8_t *iv, uint8_t ivlen, @@ -685,15 +685,21 @@ static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, for_each_sg(assoc, sg, assoc_n, i) { if (!sg->length) continue; - bufl->bufers[bufs].addr = dma_map_single(dev, - sg_virt(sg), - sg->length, - DMA_BIDIRECTIONAL); - bufl->bufers[bufs].len = sg->length; + + if (!(assoclen > 0)) + break; + + bufl->bufers[bufs].addr = + dma_map_single(dev, sg_virt(sg), + min_t(int, assoclen, sg->length), + DMA_BIDIRECTIONAL); + bufl->bufers[bufs].len = min_t(int, assoclen, sg->length); if (unlikely(dma_mapping_error(dev, bufl->bufers[bufs].addr))) goto err; bufs++; + assoclen -= sg->length; } + if (ivlen) { bufl->bufers[bufs].addr = dma_map_single(dev, iv, ivlen, DMA_BIDIRECTIONAL); @@ -845,8 +851,9 @@ static int qat_alg_aead_dec(struct aead_request *areq) int digst_size = crypto_aead_crt(aead_tfm)->authsize; int ret, ctr = 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->assoc, areq->src, areq->dst, - areq->iv, AES_BLOCK_SIZE, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, areq->assoc, areq->assoclen, + areq->src, areq->dst, areq->iv, + AES_BLOCK_SIZE, qat_req); if (unlikely(ret)) return ret; @@ -889,8 +896,9 @@ static int qat_alg_aead_enc_internal(struct aead_request *areq, uint8_t *iv, struct icp_qat_fw_la_bulk_req *msg; int ret, ctr = 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->assoc, areq->src, areq->dst, - iv, AES_BLOCK_SIZE, qat_req); + ret = qat_alg_sgl_to_bufl(ctx->inst, areq->assoc, areq->assoclen, + areq->src, areq->dst, iv, AES_BLOCK_SIZE, + qat_req); if (unlikely(ret)) return ret; @@ -1017,7 +1025,7 @@ static int qat_alg_ablkcipher_encrypt(struct ablkcipher_request *req) struct icp_qat_fw_la_bulk_req *msg; int ret, ctr = 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, NULL, req->src, req->dst, + ret = qat_alg_sgl_to_bufl(ctx->inst, NULL, 0, req->src, req->dst, NULL, 0, qat_req); if (unlikely(ret)) return ret; @@ -1055,7 +1063,7 @@ static int qat_alg_ablkcipher_decrypt(struct ablkcipher_request *req) struct icp_qat_fw_la_bulk_req *msg; int ret, ctr = 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, NULL, req->src, req->dst, + ret = qat_alg_sgl_to_bufl(ctx->inst, NULL, 0, req->src, req->dst, NULL, 0, qat_req); if (unlikely(ret)) return ret; @@ -1094,8 +1102,9 @@ static int qat_alg_aead_init(struct crypto_tfm *tfm, return -EFAULT; spin_lock_init(&ctx->lock); ctx->qat_hash_alg = hash; - tfm->crt_aead.reqsize = sizeof(struct aead_request) + - sizeof(struct qat_crypto_request); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request) + + sizeof(struct qat_crypto_request)); ctx->tfm = tfm; return 0; } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 9decea2..1bde45b 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -300,6 +300,8 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err; + pcie_set_readrq(pdev, 1024); + /* enable PCI device */ if (pci_enable_device(pdev)) { ret = -EFAULT; @@ -417,5 +419,6 @@ module_exit(adfdrv_release); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); -MODULE_FIRMWARE("qat_895xcc.bin"); +MODULE_FIRMWARE(ADF_DH895XCC_FW); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); +MODULE_VERSION(ADF_DRV_VERSION); diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 6be377f..397a500 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -1578,8 +1578,12 @@ static int sahara_probe(struct platform_device *pdev) init_completion(&dev->dma_completion); - clk_prepare_enable(dev->clk_ipg); - clk_prepare_enable(dev->clk_ahb); + err = clk_prepare_enable(dev->clk_ipg); + if (err) + goto err_link; + err = clk_prepare_enable(dev->clk_ahb); + if (err) + goto clk_ipg_disable; version = sahara_read(dev, SAHARA_REG_VERSION); if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx27-sahara")) { @@ -1619,10 +1623,11 @@ err_algs: dma_free_coherent(&pdev->dev, SAHARA_MAX_HW_LINK * sizeof(struct sahara_hw_link), dev->hw_link[0], dev->hw_phys_link[0]); - clk_disable_unprepare(dev->clk_ipg); - clk_disable_unprepare(dev->clk_ahb); kthread_stop(dev->kthread); dev_ptr = NULL; + clk_disable_unprepare(dev->clk_ahb); +clk_ipg_disable: + clk_disable_unprepare(dev->clk_ipg); err_link: dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414a..83aca95 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -46,7 +46,7 @@ #include <crypto/des.h> #include <crypto/sha.h> #include <crypto/md5.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/skcipher.h> #include <crypto/hash.h> @@ -55,49 +55,92 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, + bool is_sec1) { - talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - talitos_ptr->eptr = upper_32_bits(dma_addr); + ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); + if (!is_sec1) + ptr->eptr = upper_32_bits(dma_addr); +} + +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len, + bool is_sec1) +{ + if (is_sec1) { + ptr->res = 0; + ptr->len1 = cpu_to_be16(len); + } else { + ptr->len = cpu_to_be16(len); + } +} + +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, + bool is_sec1) +{ + if (is_sec1) + return be16_to_cpu(ptr->len1); + else + return be16_to_cpu(ptr->len); +} + +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent = 0; } /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ static void map_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, - unsigned short len, void *data, - unsigned char extent, + struct talitos_ptr *ptr, + unsigned int len, void *data, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); - talitos_ptr->len = cpu_to_be16(len); - to_talitos_ptr(talitos_ptr, dma_addr); - talitos_ptr->j_extent = extent; + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr(ptr, dma_addr, is_sec1); + to_talitos_ptr_extent_clear(ptr, is_sec1); } /* * unmap bus single (contiguous) h/w descriptor pointer */ static void unmap_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, + struct talitos_ptr *ptr, enum dma_data_direction dir) { - dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr), - be16_to_cpu(talitos_ptr->len), dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), + from_talitos_ptr_len(ptr, is_sec1), dir); } static int reset_channel(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; + bool is_sec1 = has_ftr_sec1(priv); - setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET); + if (is_sec1) { + setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, + TALITOS1_CCCR_LO_RESET); - while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET) - && --timeout) - cpu_relax(); + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) & + TALITOS1_CCCR_LO_RESET) && --timeout) + cpu_relax(); + } else { + setbits32(priv->chan[ch].reg + TALITOS_CCCR, + TALITOS2_CCCR_RESET); + + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & + TALITOS2_CCCR_RESET) && --timeout) + cpu_relax(); + } if (timeout == 0) { dev_err(dev, "failed to reset channel %d\n", ch); @@ -120,11 +163,12 @@ static int reset_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - u32 mcr = TALITOS_MCR_SWR; + bool is_sec1 = has_ftr_sec1(priv); + u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR; setbits32(priv->reg + TALITOS_MCR, mcr); - while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR) + while ((in_be32(priv->reg + TALITOS_MCR) & mcr) && --timeout) cpu_relax(); @@ -148,6 +192,7 @@ static int init_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); int ch, err; + bool is_sec1 = has_ftr_sec1(priv); /* * Master reset @@ -171,12 +216,19 @@ static int init_device(struct device *dev) } /* enable channel done and error interrupts */ - setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT); - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); + if (is_sec1) { + clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT); + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); + /* disable parity error check in DEU (erroneous? test vect.) */ + setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE); + } else { + setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT); + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); + } /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) - setbits32(priv->reg + TALITOS_MDEUICR_LO, + setbits32(priv->reg_mdeu + TALITOS_EUICR_LO, TALITOS_MDEUICR_LO_ICE); return 0; @@ -204,6 +256,7 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, struct talitos_request *request; unsigned long flags; int head; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].head_lock, flags); @@ -217,8 +270,17 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, request = &priv->chan[ch].fifo[head]; /* map descriptor and save caller data */ - request->dma_desc = dma_map_single(dev, desc, sizeof(*desc), - DMA_BIDIRECTIONAL); + if (is_sec1) { + desc->hdr1 = desc->hdr; + desc->next_desc = 0; + request->dma_desc = dma_map_single(dev, &desc->hdr1, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } else { + request->dma_desc = dma_map_single(dev, desc, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } request->callback = callback; request->context = context; @@ -250,16 +312,21 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) struct talitos_request *request, saved_req; unsigned long flags; int tail, status; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].tail_lock, flags); tail = priv->chan[ch].tail; while (priv->chan[ch].fifo[tail].desc) { + __be32 hdr; + request = &priv->chan[ch].fifo[tail]; /* descriptors with their done bits set don't get the error */ rmb(); - if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE) + hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr; + + if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE) status = 0; else if (!error) @@ -268,7 +335,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) status = error; dma_unmap_single(dev, request->dma_desc, - sizeof(struct talitos_desc), + TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL); /* copy entries so we can call callback outside lock */ @@ -302,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) /* * process completed requests for channels that have done status */ -#define DEF_TALITOS_DONE(name, ch_done_mask) \ -static void talitos_done_##name(unsigned long data) \ +#define DEF_TALITOS1_DONE(name, ch_done_mask) \ +static void talitos1_done_##name(unsigned long data) \ +{ \ + struct device *dev = (struct device *)data; \ + struct talitos_private *priv = dev_get_drvdata(dev); \ + unsigned long flags; \ + \ + if (ch_done_mask & 0x10000000) \ + flush_channel(dev, 0, 0, 0); \ + if (priv->num_channels == 1) \ + goto out; \ + if (ch_done_mask & 0x40000000) \ + flush_channel(dev, 1, 0, 0); \ + if (ch_done_mask & 0x00010000) \ + flush_channel(dev, 2, 0, 0); \ + if (ch_done_mask & 0x00040000) \ + flush_channel(dev, 3, 0, 0); \ + \ +out: \ + /* At this point, all completed channels have been processed */ \ + /* Unmask done interrupts for channels completed later on. */ \ + spin_lock_irqsave(&priv->reg_lock, flags); \ + clrbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ +} + +DEF_TALITOS1_DONE(4ch, TALITOS1_ISR_4CHDONE) + +#define DEF_TALITOS2_DONE(name, ch_done_mask) \ +static void talitos2_done_##name(unsigned long data) \ { \ struct device *dev = (struct device *)data; \ struct talitos_private *priv = dev_get_drvdata(dev); \ @@ -325,12 +421,13 @@ out: \ /* Unmask done interrupts for channels completed later on. */ \ spin_lock_irqsave(&priv->reg_lock, flags); \ setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); \ + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); \ spin_unlock_irqrestore(&priv->reg_lock, flags); \ } -DEF_TALITOS_DONE(4ch, TALITOS_ISR_4CHDONE) -DEF_TALITOS_DONE(ch0_2, TALITOS_ISR_CH_0_2_DONE) -DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE) + +DEF_TALITOS2_DONE(4ch, TALITOS2_ISR_4CHDONE) +DEF_TALITOS2_DONE(ch0_2, TALITOS2_ISR_CH_0_2_DONE) +DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE) /* * locate current (offending) descriptor @@ -377,44 +474,44 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: dev_err(dev, "AFEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AFEUISR), - in_be32(priv->reg + TALITOS_AFEUISR_LO)); + in_be32(priv->reg_afeu + TALITOS_EUISR), + in_be32(priv->reg_afeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_DEU: dev_err(dev, "DEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_DEUISR), - in_be32(priv->reg + TALITOS_DEUISR_LO)); + in_be32(priv->reg_deu + TALITOS_EUISR), + in_be32(priv->reg_deu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_MDEUA: case DESC_HDR_SEL0_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_RNG: dev_err(dev, "RNGUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_RNGUISR), - in_be32(priv->reg + TALITOS_RNGUISR_LO)); + in_be32(priv->reg_rngu + TALITOS_ISR), + in_be32(priv->reg_rngu + TALITOS_ISR_LO)); break; case DESC_HDR_SEL0_PKEU: dev_err(dev, "PKEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_PKEUISR), - in_be32(priv->reg + TALITOS_PKEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_AESU: dev_err(dev, "AESUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AESUISR), - in_be32(priv->reg + TALITOS_AESUISR_LO)); + in_be32(priv->reg_aesu + TALITOS_EUISR), + in_be32(priv->reg_aesu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_KEU: dev_err(dev, "KEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_KEUISR), - in_be32(priv->reg + TALITOS_KEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; } @@ -422,13 +519,13 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) case DESC_HDR_SEL1_MDEUA: case DESC_HDR_SEL1_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL1_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; } @@ -445,17 +542,24 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - int ch, error, reset_dev = 0, reset_ch = 0; - u32 v, v_lo; + int ch, error, reset_dev = 0; + u32 v_lo; + bool is_sec1 = has_ftr_sec1(priv); + int reset_ch = is_sec1 ? 1 : 0; /* only SEC2 supports continuation */ for (ch = 0; ch < priv->num_channels; ch++) { /* skip channels without errors */ - if (!(isr & (1 << (ch * 2 + 1)))) - continue; + if (is_sec1) { + /* bits 29, 31, 17, 19 */ + if (!(isr & (1 << (29 + (ch & 1) * 2 - (ch & 2) * 6)))) + continue; + } else { + if (!(isr & (1 << (ch * 2 + 1)))) + continue; + } error = -EINVAL; - v = in_be32(priv->chan[ch].reg + TALITOS_CCPSR); v_lo = in_be32(priv->chan[ch].reg + TALITOS_CCPSR_LO); if (v_lo & TALITOS_CCPSR_LO_DOF) { @@ -471,23 +575,28 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) if (v_lo & TALITOS_CCPSR_LO_MDTE) dev_err(dev, "master data transfer error\n"); if (v_lo & TALITOS_CCPSR_LO_SGDLZ) - dev_err(dev, "s/g data length zero error\n"); + dev_err(dev, is_sec1 ? "pointeur not complete error\n" + : "s/g data length zero error\n"); if (v_lo & TALITOS_CCPSR_LO_FPZ) - dev_err(dev, "fetch pointer zero error\n"); + dev_err(dev, is_sec1 ? "parity error\n" + : "fetch pointer zero error\n"); if (v_lo & TALITOS_CCPSR_LO_IDH) dev_err(dev, "illegal descriptor header error\n"); if (v_lo & TALITOS_CCPSR_LO_IEU) - dev_err(dev, "invalid execution unit error\n"); + dev_err(dev, is_sec1 ? "static assignment error\n" + : "invalid exec unit error\n"); if (v_lo & TALITOS_CCPSR_LO_EU) report_eu_error(dev, ch, current_desc_hdr(dev, ch)); - if (v_lo & TALITOS_CCPSR_LO_GB) - dev_err(dev, "gather boundary error\n"); - if (v_lo & TALITOS_CCPSR_LO_GRL) - dev_err(dev, "gather return/length error\n"); - if (v_lo & TALITOS_CCPSR_LO_SB) - dev_err(dev, "scatter boundary error\n"); - if (v_lo & TALITOS_CCPSR_LO_SRL) - dev_err(dev, "scatter return/length error\n"); + if (!is_sec1) { + if (v_lo & TALITOS_CCPSR_LO_GB) + dev_err(dev, "gather boundary error\n"); + if (v_lo & TALITOS_CCPSR_LO_GRL) + dev_err(dev, "gather return/length error\n"); + if (v_lo & TALITOS_CCPSR_LO_SB) + dev_err(dev, "scatter boundary error\n"); + if (v_lo & TALITOS_CCPSR_LO_SRL) + dev_err(dev, "scatter return/length error\n"); + } flush_channel(dev, ch, error, reset_ch); @@ -495,10 +604,10 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) reset_channel(dev, ch); } else { setbits32(priv->chan[ch].reg + TALITOS_CCCR, - TALITOS_CCCR_CONT); + TALITOS2_CCCR_CONT); setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, 0); while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & - TALITOS_CCCR_CONT) && --timeout) + TALITOS2_CCCR_CONT) && --timeout) cpu_relax(); if (timeout == 0) { dev_err(dev, "failed to restart channel %d\n", @@ -507,9 +616,14 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) } } } - if (reset_dev || isr & ~TALITOS_ISR_4CHERR || isr_lo) { - dev_err(dev, "done overflow, internal time out, or rngu error: " - "ISR 0x%08x_%08x\n", isr, isr_lo); + if (reset_dev || (is_sec1 && isr & ~TALITOS1_ISR_4CHERR) || + (!is_sec1 && isr & ~TALITOS2_ISR_4CHERR) || isr_lo) { + if (is_sec1 && (isr_lo & TALITOS1_ISR_TEA_ERR)) + dev_err(dev, "TEA error: ISR 0x%08x_%08x\n", + isr, isr_lo); + else + dev_err(dev, "done overflow, internal time out, or " + "rngu error: ISR 0x%08x_%08x\n", isr, isr_lo); /* purge request queues */ for (ch = 0; ch < priv->num_channels; ch++) @@ -520,8 +634,43 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) } } -#define DEF_TALITOS_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ -static irqreturn_t talitos_interrupt_##name(int irq, void *data) \ +#define DEF_TALITOS1_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ +static irqreturn_t talitos1_interrupt_##name(int irq, void *data) \ +{ \ + struct device *dev = data; \ + struct talitos_private *priv = dev_get_drvdata(dev); \ + u32 isr, isr_lo; \ + unsigned long flags; \ + \ + spin_lock_irqsave(&priv->reg_lock, flags); \ + isr = in_be32(priv->reg + TALITOS_ISR); \ + isr_lo = in_be32(priv->reg + TALITOS_ISR_LO); \ + /* Acknowledge interrupt */ \ + out_be32(priv->reg + TALITOS_ICR, isr & (ch_done_mask | ch_err_mask)); \ + out_be32(priv->reg + TALITOS_ICR_LO, isr_lo); \ + \ + if (unlikely(isr & ch_err_mask || isr_lo & TALITOS1_IMR_LO_INIT)) { \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ + talitos_error(dev, isr & ch_err_mask, isr_lo); \ + } \ + else { \ + if (likely(isr & ch_done_mask)) { \ + /* mask further done interrupts. */ \ + setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ + /* done_task will unmask done interrupts at exit */ \ + tasklet_schedule(&priv->done_task[tlet]); \ + } \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ + } \ + \ + return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \ + IRQ_NONE; \ +} + +DEF_TALITOS1_INTERRUPT(4ch, TALITOS1_ISR_4CHDONE, TALITOS1_ISR_4CHERR, 0) + +#define DEF_TALITOS2_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ +static irqreturn_t talitos2_interrupt_##name(int irq, void *data) \ { \ struct device *dev = data; \ struct talitos_private *priv = dev_get_drvdata(dev); \ @@ -552,9 +701,12 @@ static irqreturn_t talitos_interrupt_##name(int irq, void *data) \ return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \ IRQ_NONE; \ } -DEF_TALITOS_INTERRUPT(4ch, TALITOS_ISR_4CHDONE, TALITOS_ISR_4CHERR, 0) -DEF_TALITOS_INTERRUPT(ch0_2, TALITOS_ISR_CH_0_2_DONE, TALITOS_ISR_CH_0_2_ERR, 0) -DEF_TALITOS_INTERRUPT(ch1_3, TALITOS_ISR_CH_1_3_DONE, TALITOS_ISR_CH_1_3_ERR, 1) + +DEF_TALITOS2_INTERRUPT(4ch, TALITOS2_ISR_4CHDONE, TALITOS2_ISR_4CHERR, 0) +DEF_TALITOS2_INTERRUPT(ch0_2, TALITOS2_ISR_CH_0_2_DONE, TALITOS2_ISR_CH_0_2_ERR, + 0) +DEF_TALITOS2_INTERRUPT(ch1_3, TALITOS2_ISR_CH_1_3_DONE, TALITOS2_ISR_CH_1_3_ERR, + 1) /* * hwrng @@ -567,7 +719,7 @@ static int talitos_rng_data_present(struct hwrng *rng, int wait) int i; for (i = 0; i < 20; i++) { - ofl = in_be32(priv->reg + TALITOS_RNGUSR_LO) & + ofl = in_be32(priv->reg_rngu + TALITOS_EUSR_LO) & TALITOS_RNGUSR_LO_OFL; if (ofl || !wait) break; @@ -583,8 +735,8 @@ static int talitos_rng_data_read(struct hwrng *rng, u32 *data) struct talitos_private *priv = dev_get_drvdata(dev); /* rng fifo requires 64-bit accesses */ - *data = in_be32(priv->reg + TALITOS_RNGU_FIFO); - *data = in_be32(priv->reg + TALITOS_RNGU_FIFO_LO); + *data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO); + *data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO_LO); return sizeof(u32); } @@ -595,8 +747,9 @@ static int talitos_rng_init(struct hwrng *rng) struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - setbits32(priv->reg + TALITOS_RNGURCR_LO, TALITOS_RNGURCR_LO_SR); - while (!(in_be32(priv->reg + TALITOS_RNGUSR_LO) & TALITOS_RNGUSR_LO_RD) + setbits32(priv->reg_rngu + TALITOS_EURCR_LO, TALITOS_RNGURCR_LO_SR); + while (!(in_be32(priv->reg_rngu + TALITOS_EUSR_LO) + & TALITOS_RNGUSR_LO_RD) && --timeout) cpu_relax(); if (timeout == 0) { @@ -605,7 +758,7 @@ static int talitos_rng_init(struct hwrng *rng) } /* start generating */ - setbits32(priv->reg + TALITOS_RNGUDSR_LO, 0); + setbits32(priv->reg_rngu + TALITOS_EUDSR_LO, 0); return 0; } @@ -661,7 +814,7 @@ struct talitos_ahash_req_ctx { unsigned int first; unsigned int last; unsigned int to_hash_later; - u64 nbuf; + unsigned int nbuf; struct scatterlist bufsl[2]; struct scatterlist *psrc; }; @@ -712,9 +865,10 @@ badkey: * @dst_chained: whether dst is chained or not * @iv_dma: dma address of iv for checking continuity and link table * @dma_len: length of dma mapped link_tbl space - * @dma_link_tbl: bus physical address of link_tbl + * @dma_link_tbl: bus physical address of link_tbl/buf * @desc: h/w descriptor - * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) + * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2) + * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1) * * if decrypting (with authcheck), or either one of src_nents or dst_nents * is greater than 1, an integrity check value is concatenated to the end @@ -731,7 +885,10 @@ struct talitos_edesc { int dma_len; dma_addr_t dma_link_tbl; struct talitos_desc desc; - struct talitos_ptr link_tbl[0]; + union { + struct talitos_ptr link_tbl[0]; + u8 buf[0]; + }; }; static int talitos_map_sg(struct device *dev, struct scatterlist *sg, @@ -907,8 +1064,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, { int n_sg = sg_count; - while (n_sg--) { - to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg)); + while (sg && n_sg--) { + to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0); link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg)); link_tbl_ptr->j_extent = 0; link_tbl_ptr++; @@ -925,7 +1082,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - be16_add_cpu(&link_tbl_ptr->len, cryptlen); + link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) + + cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -953,7 +1111,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, - 0, DMA_TO_DEVICE); + DMA_TO_DEVICE); /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize); @@ -962,7 +1120,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; /* assoc_nents - 1 entries for assoc, 1 for IV */ @@ -973,7 +1131,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_ptr += sg_count - 1; tbl_ptr->j_extent = 0; tbl_ptr++; - to_talitos_ptr(tbl_ptr, edesc->iv_dma); + to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0); tbl_ptr->len = cpu_to_be16(ivsize); tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -982,14 +1140,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } else { if (areq->assoclen) to_talitos_ptr(&desc->ptr[1], - sg_dma_address(areq->assoc)); + sg_dma_address(areq->assoc), 0); else - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); + to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, 0); desc->ptr[1].j_extent = 0; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma); + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); desc->ptr[2].len = cpu_to_be16(ivsize); desc->ptr[2].j_extent = 0; /* Sync needed for the aead_givencrypt case */ @@ -997,7 +1155,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, 0, + (char *)&ctx->key + ctx->authkeylen, DMA_TO_DEVICE); /* @@ -1015,7 +1173,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->src_chained); if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src)); + to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0); } else { sg_link_tbl_len = cryptlen; @@ -1026,14 +1184,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, &edesc->link_tbl[0]); if (sg_count > 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl); + to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl, 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); } else { /* Only one segment now, so no link tbl needed */ to_talitos_ptr(&desc->ptr[4], - sg_dma_address(areq->src)); + sg_dma_address(areq->src), 0); } } @@ -1047,13 +1205,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, DMA_FROM_DEVICE, edesc->dst_chained); if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst)); + to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0); } else { int tbl_off = edesc->src_nents + 1; struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr)); + tbl_off * sizeof(struct talitos_ptr), 0); sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, tbl_ptr); @@ -1068,14 +1226,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + (tbl_off + edesc->dst_nents + 1 + edesc->assoc_nents) * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, DMA_FROM_DEVICE); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); @@ -1095,7 +1253,7 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, bool *chained) int sg_nents = 0; *chained = false; - while (nbytes > 0) { + while (nbytes > 0 && sg) { sg_nents++; nbytes -= sg->length; if (!sg_is_last(sg) && (sg + 1)->length == 0) @@ -1128,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) { + if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } @@ -1173,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, */ alloc_len = sizeof(struct talitos_edesc); if (assoc_nents || src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 2 + assoc_nents) * - sizeof(struct talitos_ptr) + authsize; + if (is_sec1) + dma_len = (src_nents ? cryptlen : 0) + + (dst_nents ? cryptlen : 0); + else + dma_len = (src_nents + dst_nents + 2 + assoc_nents) * + sizeof(struct talitos_ptr) + authsize; alloc_len += dma_len; } else { dma_len = 0; @@ -1327,16 +1492,43 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, + struct scatterlist *dst, unsigned int len, + struct talitos_edesc *edesc) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, + dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } +} + static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + + unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1358,6 +1550,102 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + sg_count = edesc->src_nents ? : 1; + + if (sg_count == 1) { + dma_map_sg(dev, src, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_copy_to_buffer(src, sg_count, edesc->buf, len); + to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + len, DMA_TO_DEVICE); + } + } else { + to_talitos_ptr_extent_clear(ptr, is_sec1); + + sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, + edesc->src_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + dma_sync_single_for_device(dev, + edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed*/ + to_talitos_ptr(ptr, sg_dma_address(src), + is_sec1); + } + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (dir != DMA_NONE) + sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, + dir, edesc->dst_chained); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + if (sg_count == 1) { + if (dir != DMA_NONE) + dma_map_sg(dev, dst, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + } + } else { + to_talitos_ptr_extent_clear(ptr, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr), 0); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } + } +} + static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1371,83 +1659,41 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* first DWORD empty */ - desc->ptr[0].len = 0; - to_talitos_ptr(&desc->ptr[0], 0); - desc->ptr[0].j_extent = 0; + desc->ptr[0] = zero_entry; /* cipher iv */ - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); - desc->ptr[1].len = cpu_to_be16(ivsize); - desc->ptr[1].j_extent = 0; + to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); + to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); /* * cipher in */ - desc->ptr[3].len = cpu_to_be16(cryptlen); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[3], sg_dma_address(areq->src)); - } else { - sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl); - desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed */ - to_talitos_ptr(&desc->ptr[3], - sg_dma_address(areq->src)); - } - } + sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + &desc->ptr[3]); /* cipher out */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = 0; - - if (areq->src != areq->dst) - sg_count = talitos_map_sg(dev, areq->dst, - edesc->dst_nents ? : 1, - DMA_FROM_DEVICE, edesc->dst_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->dst)); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr)); - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, - link_tbl_ptr); - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); - } + map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, + (areq->src == areq->dst) ? DMA_NONE + : DMA_FROM_DEVICE, + &desc->ptr[4], sg_count); /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, DMA_FROM_DEVICE); /* last DWORD empty */ - desc->ptr[6].len = 0; - to_talitos_ptr(&desc->ptr[6], 0); - desc->ptr[6].j_extent = 0; + desc->ptr[6] = zero_entry; ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { @@ -1507,20 +1753,22 @@ static void common_nonsnoop_hash_unmap(struct device *dev, struct ahash_request *areq) { struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + /* When using hashctx-in, must unmap it. */ - if (edesc->desc.ptr[1].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - if (edesc->desc.ptr[2].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[2], is_sec1)) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1548,6 +1796,27 @@ static void ahash_done(struct device *dev, areq->base.complete(&areq->base, err); } +/* + * SEC1 doesn't like hashing of 0 sized message, so we do the padding + * ourself and submit a padded block + */ +void talitos_handle_buggy_hash(struct talitos_ctx *ctx, + struct talitos_edesc *edesc, + struct talitos_ptr *ptr) +{ + static u8 padded_hash[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + pr_err_once("Bug in SEC1, padding ourself\n"); + edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD; + map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash), + (char *)padded_hash, DMA_TO_DEVICE); +} + static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct ahash_request *areq, unsigned int length, void (*callback) (struct device *dev, @@ -1559,7 +1828,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; - int sg_count, ret; + int ret; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1568,7 +1839,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (!req_ctx->first || req_ctx->swinit) { map_single_talitos_ptr(dev, &desc->ptr[1], req_ctx->hw_context_size, - (char *)req_ctx->hw_context, 0, + (char *)req_ctx->hw_context, DMA_TO_DEVICE); req_ctx->swinit = 0; } else { @@ -1580,38 +1851,15 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* HMAC key */ if (ctx->keylen) map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); else desc->ptr[2] = zero_entry; /* * data in */ - desc->ptr[3].len = cpu_to_be16(length); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, req_ctx->psrc, - edesc->src_nents ? : 1, - DMA_TO_DEVICE, edesc->src_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc)); - } else { - sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length, - &edesc->link_tbl[0]); - if (sg_count > 1) { - desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl); - dma_sync_single_for_device(ctx->dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed */ - to_talitos_ptr(&desc->ptr[3], - sg_dma_address(req_ctx->psrc)); - } - } + map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, + DMA_TO_DEVICE, &desc->ptr[3]); /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1620,15 +1868,18 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (req_ctx->last) map_single_talitos_ptr(dev, &desc->ptr[5], crypto_ahash_digestsize(tfm), - areq->result, 0, DMA_FROM_DEVICE); + areq->result, DMA_FROM_DEVICE); else map_single_talitos_ptr(dev, &desc->ptr[5], req_ctx->hw_context_size, - req_ctx->hw_context, 0, DMA_FROM_DEVICE); + req_ctx->hw_context, DMA_FROM_DEVICE); /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) + talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); @@ -2561,6 +2812,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, break; default: dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type); + kfree(t_alg); return ERR_PTR(-EINVAL); } @@ -2581,29 +2833,35 @@ static int talitos_probe_irq(struct platform_device *ofdev) struct device_node *np = ofdev->dev.of_node; struct talitos_private *priv = dev_get_drvdata(dev); int err; + bool is_sec1 = has_ftr_sec1(priv); priv->irq[0] = irq_of_parse_and_map(np, 0); if (!priv->irq[0]) { dev_err(dev, "failed to map irq\n"); return -EINVAL; } + if (is_sec1) { + err = request_irq(priv->irq[0], talitos1_interrupt_4ch, 0, + dev_driver_string(dev), dev); + goto primary_out; + } priv->irq[1] = irq_of_parse_and_map(np, 1); /* get the primary irq line */ if (!priv->irq[1]) { - err = request_irq(priv->irq[0], talitos_interrupt_4ch, 0, + err = request_irq(priv->irq[0], talitos2_interrupt_4ch, 0, dev_driver_string(dev), dev); goto primary_out; } - err = request_irq(priv->irq[0], talitos_interrupt_ch0_2, 0, + err = request_irq(priv->irq[0], talitos2_interrupt_ch0_2, 0, dev_driver_string(dev), dev); if (err) goto primary_out; /* get the secondary irq line */ - err = request_irq(priv->irq[1], talitos_interrupt_ch1_3, 0, + err = request_irq(priv->irq[1], talitos2_interrupt_ch1_3, 0, dev_driver_string(dev), dev); if (err) { dev_err(dev, "failed to request secondary irq\n"); @@ -2630,6 +2888,7 @@ static int talitos_probe(struct platform_device *ofdev) struct talitos_private *priv; const unsigned int *prop; int i, err; + int stride; priv = kzalloc(sizeof(struct talitos_private), GFP_KERNEL); if (!priv) @@ -2643,20 +2902,6 @@ static int talitos_probe(struct platform_device *ofdev) spin_lock_init(&priv->reg_lock); - err = talitos_probe_irq(ofdev); - if (err) - goto err_out; - - if (!priv->irq[1]) { - tasklet_init(&priv->done_task[0], talitos_done_4ch, - (unsigned long)dev); - } else { - tasklet_init(&priv->done_task[0], talitos_done_ch0_2, - (unsigned long)dev); - tasklet_init(&priv->done_task[1], talitos_done_ch1_3, - (unsigned long)dev); - } - priv->reg = of_iomap(np, 0); if (!priv->reg) { dev_err(dev, "failed to of_iomap\n"); @@ -2696,6 +2941,53 @@ static int talitos_probe(struct platform_device *ofdev) TALITOS_FTR_SHA224_HWINIT | TALITOS_FTR_HMAC_OK; + if (of_device_is_compatible(np, "fsl,sec1.0")) + priv->features |= TALITOS_FTR_SEC1; + + if (of_device_is_compatible(np, "fsl,sec1.2")) { + priv->reg_deu = priv->reg + TALITOS12_DEU; + priv->reg_aesu = priv->reg + TALITOS12_AESU; + priv->reg_mdeu = priv->reg + TALITOS12_MDEU; + stride = TALITOS1_CH_STRIDE; + } else if (of_device_is_compatible(np, "fsl,sec1.0")) { + priv->reg_deu = priv->reg + TALITOS10_DEU; + priv->reg_aesu = priv->reg + TALITOS10_AESU; + priv->reg_mdeu = priv->reg + TALITOS10_MDEU; + priv->reg_afeu = priv->reg + TALITOS10_AFEU; + priv->reg_rngu = priv->reg + TALITOS10_RNGU; + priv->reg_pkeu = priv->reg + TALITOS10_PKEU; + stride = TALITOS1_CH_STRIDE; + } else { + priv->reg_deu = priv->reg + TALITOS2_DEU; + priv->reg_aesu = priv->reg + TALITOS2_AESU; + priv->reg_mdeu = priv->reg + TALITOS2_MDEU; + priv->reg_afeu = priv->reg + TALITOS2_AFEU; + priv->reg_rngu = priv->reg + TALITOS2_RNGU; + priv->reg_pkeu = priv->reg + TALITOS2_PKEU; + priv->reg_keu = priv->reg + TALITOS2_KEU; + priv->reg_crcu = priv->reg + TALITOS2_CRCU; + stride = TALITOS2_CH_STRIDE; + } + + err = talitos_probe_irq(ofdev); + if (err) + goto err_out; + + if (of_device_is_compatible(np, "fsl,sec1.0")) { + tasklet_init(&priv->done_task[0], talitos1_done_4ch, + (unsigned long)dev); + } else { + if (!priv->irq[1]) { + tasklet_init(&priv->done_task[0], talitos2_done_4ch, + (unsigned long)dev); + } else { + tasklet_init(&priv->done_task[0], talitos2_done_ch0_2, + (unsigned long)dev); + tasklet_init(&priv->done_task[1], talitos2_done_ch1_3, + (unsigned long)dev); + } + } + priv->chan = kzalloc(sizeof(struct talitos_channel) * priv->num_channels, GFP_KERNEL); if (!priv->chan) { @@ -2707,7 +2999,7 @@ static int talitos_probe(struct platform_device *ofdev) priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); for (i = 0; i < priv->num_channels; i++) { - priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); + priv->chan[i].reg = priv->reg + stride * (i + 1); if (!priv->irq[1] || !(i & 1)) priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; @@ -2794,9 +3086,16 @@ err_out: } static const struct of_device_id talitos_match[] = { +#ifdef CONFIG_CRYPTO_DEV_TALITOS1 + { + .compatible = "fsl,sec1.0", + }, +#endif +#ifdef CONFIG_CRYPTO_DEV_TALITOS2 { .compatible = "fsl,sec2.0", }, +#endif {}, }; MODULE_DEVICE_TABLE(of, talitos_match); diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 61a1405..314daf5 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -29,7 +29,8 @@ */ #define TALITOS_TIMEOUT 100000 -#define TALITOS_MAX_DATA_LEN 65535 +#define TALITOS1_MAX_DATA_LEN 32768 +#define TALITOS2_MAX_DATA_LEN 65535 #define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f) #define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf) @@ -37,9 +38,17 @@ /* descriptor pointer entry */ struct talitos_ptr { - __be16 len; /* length */ - u8 j_extent; /* jump to sg link table and/or extent */ - u8 eptr; /* extended address */ + union { + struct { /* SEC2 format */ + __be16 len; /* length */ + u8 j_extent; /* jump to sg link table and/or extent*/ + u8 eptr; /* extended address */ + }; + struct { /* SEC1 format */ + __be16 res; + __be16 len1; /* length */ + }; + }; __be32 ptr; /* address */ }; @@ -53,10 +62,16 @@ static const struct talitos_ptr zero_entry = { /* descriptor */ struct talitos_desc { __be32 hdr; /* header high bits */ - __be32 hdr_lo; /* header low bits */ + union { + __be32 hdr_lo; /* header low bits */ + __be32 hdr1; /* header for SEC1 */ + }; struct talitos_ptr ptr[7]; /* ptr/len pair array */ + __be32 next_desc; /* next descriptor (SEC1) */ }; +#define TALITOS_DESC_SIZE (sizeof(struct talitos_desc) - sizeof(__be32)) + /** * talitos_request - descriptor submission request * @desc: descriptor pointer (kernel virtual) @@ -97,6 +112,14 @@ struct talitos_private { struct device *dev; struct platform_device *ofdev; void __iomem *reg; + void __iomem *reg_deu; + void __iomem *reg_aesu; + void __iomem *reg_mdeu; + void __iomem *reg_afeu; + void __iomem *reg_rngu; + void __iomem *reg_pkeu; + void __iomem *reg_keu; + void __iomem *reg_crcu; int irq[2]; /* SEC global registers lock */ @@ -144,49 +167,80 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 #define TALITOS_FTR_SHA224_HWINIT 0x00000004 #define TALITOS_FTR_HMAC_OK 0x00000008 +#define TALITOS_FTR_SEC1 0x00000010 + +/* + * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are + * defined, we check the features which are set according to the device tree. + * Otherwise, we answer true or false directly + */ +static inline bool has_ftr_sec1(struct talitos_private *priv) +{ +#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2) + return priv->features & TALITOS_FTR_SEC1 ? true : false; +#elif defined(CONFIG_CRYPTO_DEV_TALITOS1) + return true; +#else + return false; +#endif +} /* * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register */ +#define ISR1_FORMAT(x) (((x) << 28) | ((x) << 16)) +#define ISR2_FORMAT(x) (((x) << 4) | (x)) + /* global register offset addresses */ #define TALITOS_MCR 0x1030 /* master control register */ #define TALITOS_MCR_RCA0 (1 << 15) /* remap channel 0 */ #define TALITOS_MCR_RCA1 (1 << 14) /* remap channel 1 */ #define TALITOS_MCR_RCA2 (1 << 13) /* remap channel 2 */ #define TALITOS_MCR_RCA3 (1 << 12) /* remap channel 3 */ -#define TALITOS_MCR_SWR 0x1 /* s/w reset */ +#define TALITOS1_MCR_SWR 0x1000000 /* s/w reset */ +#define TALITOS2_MCR_SWR 0x1 /* s/w reset */ #define TALITOS_MCR_LO 0x1034 #define TALITOS_IMR 0x1008 /* interrupt mask register */ -#define TALITOS_IMR_INIT 0x100ff /* enable channel IRQs */ -#define TALITOS_IMR_DONE 0x00055 /* done IRQs */ +/* enable channel IRQs */ +#define TALITOS1_IMR_INIT ISR1_FORMAT(0xf) +#define TALITOS1_IMR_DONE ISR1_FORMAT(0x5) /* done IRQs */ +/* enable channel IRQs */ +#define TALITOS2_IMR_INIT (ISR2_FORMAT(0xf) | 0x10000) +#define TALITOS2_IMR_DONE ISR1_FORMAT(0x5) /* done IRQs */ #define TALITOS_IMR_LO 0x100C -#define TALITOS_IMR_LO_INIT 0x20000 /* allow RNGU error IRQs */ +#define TALITOS1_IMR_LO_INIT 0x2000000 /* allow RNGU error IRQs */ +#define TALITOS2_IMR_LO_INIT 0x20000 /* allow RNGU error IRQs */ #define TALITOS_ISR 0x1010 /* interrupt status register */ -#define TALITOS_ISR_4CHERR 0xaa /* 4 channel errors mask */ -#define TALITOS_ISR_4CHDONE 0x55 /* 4 channel done mask */ -#define TALITOS_ISR_CH_0_2_ERR 0x22 /* channels 0, 2 errors mask */ -#define TALITOS_ISR_CH_0_2_DONE 0x11 /* channels 0, 2 done mask */ -#define TALITOS_ISR_CH_1_3_ERR 0x88 /* channels 1, 3 errors mask */ -#define TALITOS_ISR_CH_1_3_DONE 0x44 /* channels 1, 3 done mask */ +#define TALITOS1_ISR_4CHERR ISR1_FORMAT(0xa) /* 4 ch errors mask */ +#define TALITOS1_ISR_4CHDONE ISR1_FORMAT(0x5) /* 4 ch done mask */ +#define TALITOS1_ISR_TEA_ERR 0x00000040 +#define TALITOS2_ISR_4CHERR ISR2_FORMAT(0xa) /* 4 ch errors mask */ +#define TALITOS2_ISR_4CHDONE ISR2_FORMAT(0x5) /* 4 ch done mask */ +#define TALITOS2_ISR_CH_0_2_ERR ISR2_FORMAT(0x2) /* ch 0, 2 err mask */ +#define TALITOS2_ISR_CH_0_2_DONE ISR2_FORMAT(0x1) /* ch 0, 2 done mask */ +#define TALITOS2_ISR_CH_1_3_ERR ISR2_FORMAT(0x8) /* ch 1, 3 err mask */ +#define TALITOS2_ISR_CH_1_3_DONE ISR2_FORMAT(0x4) /* ch 1, 3 done mask */ #define TALITOS_ISR_LO 0x1014 #define TALITOS_ICR 0x1018 /* interrupt clear register */ #define TALITOS_ICR_LO 0x101C /* channel register address stride */ #define TALITOS_CH_BASE_OFFSET 0x1000 /* default channel map base */ -#define TALITOS_CH_STRIDE 0x100 +#define TALITOS1_CH_STRIDE 0x1000 +#define TALITOS2_CH_STRIDE 0x100 /* channel configuration register */ #define TALITOS_CCCR 0x8 -#define TALITOS_CCCR_CONT 0x2 /* channel continue */ -#define TALITOS_CCCR_RESET 0x1 /* channel reset */ +#define TALITOS2_CCCR_CONT 0x2 /* channel continue on SEC2 */ +#define TALITOS2_CCCR_RESET 0x1 /* channel reset on SEC2 */ #define TALITOS_CCCR_LO 0xc #define TALITOS_CCCR_LO_IWSE 0x80 /* chan. ICCR writeback enab. */ #define TALITOS_CCCR_LO_EAE 0x20 /* extended address enable */ #define TALITOS_CCCR_LO_CDWE 0x10 /* chan. done writeback enab. */ #define TALITOS_CCCR_LO_NT 0x4 /* notification type */ #define TALITOS_CCCR_LO_CDIE 0x2 /* channel done IRQ enable */ +#define TALITOS1_CCCR_LO_RESET 0x1 /* channel reset on SEC1 */ /* CCPSR: channel pointer status register */ #define TALITOS_CCPSR 0x10 @@ -224,37 +278,48 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_SCATTER 0xe0 #define TALITOS_SCATTER_LO 0xe4 +/* execution unit registers base */ +#define TALITOS2_DEU 0x2000 +#define TALITOS2_AESU 0x4000 +#define TALITOS2_MDEU 0x6000 +#define TALITOS2_AFEU 0x8000 +#define TALITOS2_RNGU 0xa000 +#define TALITOS2_PKEU 0xc000 +#define TALITOS2_KEU 0xe000 +#define TALITOS2_CRCU 0xf000 + +#define TALITOS12_AESU 0x4000 +#define TALITOS12_DEU 0x5000 +#define TALITOS12_MDEU 0x6000 + +#define TALITOS10_AFEU 0x8000 +#define TALITOS10_DEU 0xa000 +#define TALITOS10_MDEU 0xc000 +#define TALITOS10_RNGU 0xe000 +#define TALITOS10_PKEU 0x10000 +#define TALITOS10_AESU 0x12000 + /* execution unit interrupt status registers */ -#define TALITOS_DEUISR 0x2030 /* DES unit */ -#define TALITOS_DEUISR_LO 0x2034 -#define TALITOS_AESUISR 0x4030 /* AES unit */ -#define TALITOS_AESUISR_LO 0x4034 -#define TALITOS_MDEUISR 0x6030 /* message digest unit */ -#define TALITOS_MDEUISR_LO 0x6034 -#define TALITOS_MDEUICR 0x6038 /* interrupt control */ -#define TALITOS_MDEUICR_LO 0x603c +#define TALITOS_EUDSR 0x10 /* data size */ +#define TALITOS_EUDSR_LO 0x14 +#define TALITOS_EURCR 0x18 /* reset control*/ +#define TALITOS_EURCR_LO 0x1c +#define TALITOS_EUSR 0x28 /* rng status */ +#define TALITOS_EUSR_LO 0x2c +#define TALITOS_EUISR 0x30 +#define TALITOS_EUISR_LO 0x34 +#define TALITOS_EUICR 0x38 /* int. control */ +#define TALITOS_EUICR_LO 0x3c +#define TALITOS_EU_FIFO 0x800 /* output FIFO */ +#define TALITOS_EU_FIFO_LO 0x804 /* output FIFO */ +/* DES unit */ +#define TALITOS1_DEUICR_KPE 0x00200000 /* Key Parity Error */ +/* message digest unit */ #define TALITOS_MDEUICR_LO_ICE 0x4000 /* integrity check IRQ enable */ -#define TALITOS_AFEUISR 0x8030 /* arc4 unit */ -#define TALITOS_AFEUISR_LO 0x8034 -#define TALITOS_RNGUISR 0xa030 /* random number unit */ -#define TALITOS_RNGUISR_LO 0xa034 -#define TALITOS_RNGUSR 0xa028 /* rng status */ -#define TALITOS_RNGUSR_LO 0xa02c +/* random number unit */ #define TALITOS_RNGUSR_LO_RD 0x1 /* reset done */ #define TALITOS_RNGUSR_LO_OFL 0xff0000/* output FIFO length */ -#define TALITOS_RNGUDSR 0xa010 /* data size */ -#define TALITOS_RNGUDSR_LO 0xa014 -#define TALITOS_RNGU_FIFO 0xa800 /* output FIFO */ -#define TALITOS_RNGU_FIFO_LO 0xa804 /* output FIFO */ -#define TALITOS_RNGURCR 0xa018 /* reset control */ -#define TALITOS_RNGURCR_LO 0xa01c #define TALITOS_RNGURCR_LO_SR 0x1 /* software reset */ -#define TALITOS_PKEUISR 0xc030 /* public key unit */ -#define TALITOS_PKEUISR_LO 0xc034 -#define TALITOS_KEUISR 0xe030 /* kasumi unit */ -#define TALITOS_KEUISR_LO 0xe034 -#define TALITOS_CRCUISR 0xf030 /* cyclic redundancy check unit*/ -#define TALITOS_CRCUISR_LO 0xf034 #define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 0x28 #define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512 0x48 diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig index b35e5c4..3079644 100644 --- a/drivers/crypto/ux500/Kconfig +++ b/drivers/crypto/ux500/Kconfig @@ -7,6 +7,8 @@ config CRYPTO_DEV_UX500_CRYP tristate "UX500 crypto driver for CRYP block" depends on CRYPTO_DEV_UX500 + select CRYPTO_ALGAPI + select CRYPTO_BLKCIPHER select CRYPTO_DES help This selects the crypto driver for the UX500_CRYP hardware. It supports @@ -16,7 +18,6 @@ config CRYPTO_DEV_UX500_HASH tristate "UX500 crypto driver for HASH block" depends on CRYPTO_DEV_UX500 select CRYPTO_HASH - select CRYPTO_HMAC help This selects the hash driver for the UX500_HASH hardware. Depends on UX500/STM DMA if running in DMA mode. @@ -24,7 +25,6 @@ config CRYPTO_DEV_UX500_HASH config CRYPTO_DEV_UX500_DEBUG bool "Activate ux500 platform debug-mode for crypto and hash block" depends on CRYPTO_DEV_UX500_CRYP || CRYPTO_DEV_UX500_HASH - default n help Say Y if you want to add debug prints to ux500_hash and ux500_cryp devices. diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig index 771babf..89d8208 100644 --- a/drivers/crypto/vmx/Kconfig +++ b/drivers/crypto/vmx/Kconfig @@ -1,6 +1,6 @@ config CRYPTO_DEV_VMX_ENCRYPT tristate "Encryption acceleration support on P8 CPU" - depends on PPC64 && CRYPTO_DEV_VMX + depends on CRYPTO_DEV_VMX default y help Support for VMX cryptographic acceleration instructions on Power8 CPU. diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index c699c6e..d28ab96 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -4,7 +4,7 @@ vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o gha ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le else -TARGET := linux-pcc64 +TARGET := linux-ppc64 endif quiet_cmd_perl = PERL $@ diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c index a9064e3..e79e567 100644 --- a/drivers/crypto/vmx/aes.c +++ b/drivers/crypto/vmx/aes.c @@ -30,116 +30,118 @@ #include "aesp8-ppc.h" struct p8_aes_ctx { - struct crypto_cipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; + struct crypto_cipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; }; static int p8_aes_init(struct crypto_tfm *tfm) { - const char *alg; - struct crypto_cipher *fallback; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!(alg = crypto_tfm_alg_name(tfm))) { - printk(KERN_ERR "Failed to get algorithm name.\n"); - return -ENOENT; - } - - fallback = crypto_alloc_cipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - printk(KERN_INFO "Using '%s' as fallback implementation.\n", - crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); - - crypto_cipher_set_flags(fallback, - crypto_cipher_get_flags((struct crypto_cipher *) tfm)); - ctx->fallback = fallback; - - return 0; + const char *alg; + struct crypto_cipher *fallback; + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_cipher_set_flags(fallback, + crypto_cipher_get_flags((struct + crypto_cipher *) + tfm)); + ctx->fallback = fallback; + + return 0; } static void p8_aes_exit(struct crypto_tfm *tfm) { - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) { - crypto_free_cipher(ctx->fallback); - ctx->fallback = NULL; - } + if (ctx->fallback) { + crypto_free_cipher(ctx->fallback); + ctx->fallback = NULL; + } } static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int keylen) { - int ret; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - pagefault_enable(); - preempt_enable(); - - ret += crypto_cipher_setkey(ctx->fallback, key, keylen); - return ret; + int ret; + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + pagefault_enable(); + preempt_enable(); + + ret += crypto_cipher_setkey(ctx->fallback, key, keylen); + return ret; } static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (in_interrupt()) { - crypto_cipher_encrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - aes_p8_encrypt(src, dst, &ctx->enc_key); - pagefault_enable(); - preempt_enable(); - } + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (in_interrupt()) { + crypto_cipher_encrypt_one(ctx->fallback, dst, src); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_encrypt(src, dst, &ctx->enc_key); + pagefault_enable(); + preempt_enable(); + } } static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (in_interrupt()) { - crypto_cipher_decrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - aes_p8_decrypt(src, dst, &ctx->dec_key); - pagefault_enable(); - preempt_enable(); - } + struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + if (in_interrupt()) { + crypto_cipher_decrypt_one(ctx->fallback, dst, src); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_decrypt(src, dst, &ctx->dec_key); + pagefault_enable(); + preempt_enable(); + } } struct crypto_alg p8_aes_alg = { - .cra_name = "aes", - .cra_driver_name = "p8_aes", - .cra_module = THIS_MODULE, - .cra_priority = 1000, - .cra_type = NULL, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, - .cra_alignmask = 0, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_aes_ctx), - .cra_init = p8_aes_init, - .cra_exit = p8_aes_exit, - .cra_cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = p8_aes_setkey, - .cia_encrypt = p8_aes_encrypt, - .cia_decrypt = p8_aes_decrypt, - }, + .cra_name = "aes", + .cra_driver_name = "p8_aes", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = NULL, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_ctx), + .cra_init = p8_aes_init, + .cra_exit = p8_aes_exit, + .cra_cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = p8_aes_setkey, + .cia_encrypt = p8_aes_encrypt, + .cia_decrypt = p8_aes_decrypt, + }, }; - diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c index 477284a..7299995 100644 --- a/drivers/crypto/vmx/aes_cbc.c +++ b/drivers/crypto/vmx/aes_cbc.c @@ -31,160 +31,168 @@ #include "aesp8-ppc.h" struct p8_aes_cbc_ctx { - struct crypto_blkcipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; }; static int p8_aes_cbc_init(struct crypto_tfm *tfm) { - const char *alg; - struct crypto_blkcipher *fallback; - struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!(alg = crypto_tfm_alg_name(tfm))) { - printk(KERN_ERR "Failed to get algorithm name.\n"); - return -ENOENT; - } - - fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - printk(KERN_INFO "Using '%s' as fallback implementation.\n", - crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); - - crypto_blkcipher_set_flags(fallback, - crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); - ctx->fallback = fallback; - - return 0; + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; } static void p8_aes_cbc_exit(struct crypto_tfm *tfm) { - struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) { - crypto_free_blkcipher(ctx->fallback); - ctx->fallback = NULL; - } + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } } static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int keylen) { - int ret; - struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); - - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - pagefault_enable(); - preempt_enable(); - - ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); - return ret; + int ret; + struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx(tfm); + + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); + pagefault_enable(); + preempt_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; } static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - int ret; - struct blkcipher_walk walk; - struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( - crypto_blkcipher_tfm(desc->tfm)); - struct blkcipher_desc fallback_desc = { - .tfm = ctx->fallback, - .info = desc->info, - .flags = desc->flags - }; - - if (in_interrupt()) { - ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - - blkcipher_walk_init(&walk, dst, src, nbytes); - ret = blkcipher_walk_virt(desc, &walk); - while ((nbytes = walk.nbytes)) { - aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - nbytes & AES_BLOCK_MASK, &ctx->enc_key, walk.iv, 1); + int ret; + struct blkcipher_walk walk; + struct p8_aes_cbc_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, + nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt(desc, &walk); + while ((nbytes = walk.nbytes)) { + aes_p8_cbc_encrypt(walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, + &ctx->enc_key, walk.iv, 1); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); - } + } - pagefault_enable(); - preempt_enable(); - } + pagefault_enable(); + preempt_enable(); + } - return ret; + return ret; } static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - int ret; - struct blkcipher_walk walk; - struct p8_aes_cbc_ctx *ctx = crypto_tfm_ctx( - crypto_blkcipher_tfm(desc->tfm)); - struct blkcipher_desc fallback_desc = { - .tfm = ctx->fallback, - .info = desc->info, - .flags = desc->flags - }; - - if (in_interrupt()) { - ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - - blkcipher_walk_init(&walk, dst, src, nbytes); - ret = blkcipher_walk_virt(desc, &walk); - while ((nbytes = walk.nbytes)) { - aes_p8_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, - nbytes & AES_BLOCK_MASK, &ctx->dec_key, walk.iv, 0); + int ret; + struct blkcipher_walk walk; + struct p8_aes_cbc_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_decrypt(&fallback_desc, dst, src, + nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt(desc, &walk); + while ((nbytes = walk.nbytes)) { + aes_p8_cbc_encrypt(walk.src.virt.addr, + walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, + &ctx->dec_key, walk.iv, 0); nbytes &= AES_BLOCK_SIZE - 1; ret = blkcipher_walk_done(desc, &walk, nbytes); } - pagefault_enable(); - preempt_enable(); - } + pagefault_enable(); + preempt_enable(); + } - return ret; + return ret; } struct crypto_alg p8_aes_cbc_alg = { - .cra_name = "cbc(aes)", - .cra_driver_name = "p8_aes_cbc", - .cra_module = THIS_MODULE, - .cra_priority = 1000, - .cra_type = &crypto_blkcipher_type, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, - .cra_alignmask = 0, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), - .cra_init = p8_aes_cbc_init, - .cra_exit = p8_aes_cbc_exit, - .cra_blkcipher = { - .ivsize = 0, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = p8_aes_cbc_setkey, - .encrypt = p8_aes_cbc_encrypt, - .decrypt = p8_aes_cbc_decrypt, - }, + .cra_name = "cbc(aes)", + .cra_driver_name = "p8_aes_cbc", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), + .cra_init = p8_aes_cbc_init, + .cra_exit = p8_aes_cbc_exit, + .cra_blkcipher = { + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = p8_aes_cbc_setkey, + .encrypt = p8_aes_cbc_encrypt, + .decrypt = p8_aes_cbc_decrypt, + }, }; - diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c index 96dbee4..7adae42 100644 --- a/drivers/crypto/vmx/aes_ctr.c +++ b/drivers/crypto/vmx/aes_ctr.c @@ -30,138 +30,147 @@ #include "aesp8-ppc.h" struct p8_aes_ctr_ctx { - struct crypto_blkcipher *fallback; - struct aes_key enc_key; + struct crypto_blkcipher *fallback; + struct aes_key enc_key; }; static int p8_aes_ctr_init(struct crypto_tfm *tfm) { - const char *alg; - struct crypto_blkcipher *fallback; - struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!(alg = crypto_tfm_alg_name(tfm))) { - printk(KERN_ERR "Failed to get algorithm name.\n"); - return -ENOENT; - } - - fallback = crypto_alloc_blkcipher(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - printk(KERN_INFO "Using '%s' as fallback implementation.\n", - crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); - - crypto_blkcipher_set_flags(fallback, - crypto_blkcipher_get_flags((struct crypto_blkcipher *) tfm)); - ctx->fallback = fallback; - - return 0; + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; } static void p8_aes_ctr_exit(struct crypto_tfm *tfm) { - struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) { - crypto_free_blkcipher(ctx->fallback); - ctx->fallback = NULL; - } + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } } static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) + unsigned int keylen) { - int ret; - struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); + int ret; + struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx(tfm); - pagefault_disable(); - enable_kernel_altivec(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - pagefault_enable(); + pagefault_disable(); + enable_kernel_altivec(); + ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); + pagefault_enable(); - ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); - return ret; + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; } static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx, - struct blkcipher_walk *walk) + struct blkcipher_walk *walk) { - u8 *ctrblk = walk->iv; - u8 keystream[AES_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - pagefault_disable(); - enable_kernel_altivec(); - aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); - pagefault_enable(); - - crypto_xor(keystream, src, nbytes); - memcpy(dst, keystream, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); + u8 *ctrblk = walk->iv; + u8 keystream[AES_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); + pagefault_enable(); + + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + crypto_inc(ctrblk, AES_BLOCK_SIZE); } static int p8_aes_ctr_crypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) { - int ret; - struct blkcipher_walk walk; - struct p8_aes_ctr_ctx *ctx = crypto_tfm_ctx( - crypto_blkcipher_tfm(desc->tfm)); - struct blkcipher_desc fallback_desc = { - .tfm = ctx->fallback, - .info = desc->info, - .flags = desc->flags - }; - - if (in_interrupt()) { - ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes); - } else { - blkcipher_walk_init(&walk, dst, src, nbytes); - ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - pagefault_disable(); - enable_kernel_altivec(); - aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, walk.dst.virt.addr, - (nbytes & AES_BLOCK_MASK)/AES_BLOCK_SIZE, &ctx->enc_key, walk.iv); - pagefault_enable(); - - crypto_inc(walk.iv, AES_BLOCK_SIZE); - nbytes &= AES_BLOCK_SIZE - 1; - ret = blkcipher_walk_done(desc, &walk, nbytes); - } - if (walk.nbytes) { - p8_aes_ctr_final(ctx, &walk); - ret = blkcipher_walk_done(desc, &walk, 0); - } - } - - return ret; + int ret; + struct blkcipher_walk walk; + struct p8_aes_ctr_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = crypto_blkcipher_encrypt(&fallback_desc, dst, src, + nbytes); + } else { + blkcipher_walk_init(&walk, dst, src, nbytes); + ret = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); + while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + pagefault_disable(); + enable_kernel_altivec(); + aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, + walk.dst.virt.addr, + (nbytes & + AES_BLOCK_MASK) / + AES_BLOCK_SIZE, + &ctx->enc_key, + walk.iv); + pagefault_enable(); + + crypto_inc(walk.iv, AES_BLOCK_SIZE); + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + if (walk.nbytes) { + p8_aes_ctr_final(ctx, &walk); + ret = blkcipher_walk_done(desc, &walk, 0); + } + } + + return ret; } struct crypto_alg p8_aes_ctr_alg = { - .cra_name = "ctr(aes)", - .cra_driver_name = "p8_aes_ctr", - .cra_module = THIS_MODULE, - .cra_priority = 1000, - .cra_type = &crypto_blkcipher_type, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, - .cra_alignmask = 0, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), - .cra_init = p8_aes_ctr_init, - .cra_exit = p8_aes_ctr_exit, - .cra_blkcipher = { - .ivsize = 0, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = p8_aes_ctr_setkey, - .encrypt = p8_aes_ctr_crypt, - .decrypt = p8_aes_ctr_crypt, - }, + .cra_name = "ctr(aes)", + .cra_driver_name = "p8_aes_ctr", + .cra_module = THIS_MODULE, + .cra_priority = 1000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), + .cra_init = p8_aes_ctr_init, + .cra_exit = p8_aes_ctr_exit, + .cra_blkcipher = { + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = p8_aes_ctr_setkey, + .encrypt = p8_aes_ctr_crypt, + .decrypt = p8_aes_ctr_crypt, + }, }; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index e963945..4cd34ee 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -4,17 +4,18 @@ #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) struct aes_key { - u8 key[AES_MAX_KEYLENGTH]; - int rounds; + u8 key[AES_MAX_KEYLENGTH]; + int rounds; }; int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); + struct aes_key *key); int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); + struct aes_key *key); void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key); -void aes_p8_decrypt(const u8 *in, u8 *out,const struct aes_key *key); +void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key); void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key, u8 *iv, const int enc); + const struct aes_key *key, u8 *iv, const int enc); void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, - size_t len, const struct aes_key *key, const u8 *iv); + size_t len, const struct aes_key *key, + const u8 *iv); diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c index f255ec4..b5e2900 100644 --- a/drivers/crypto/vmx/ghash.c +++ b/drivers/crypto/vmx/ghash.c @@ -39,184 +39,188 @@ void gcm_init_p8(u128 htable[16], const u64 Xi[2]); void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]); void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], - const u8 *in,size_t len); + const u8 *in, size_t len); struct p8_ghash_ctx { - u128 htable[16]; - struct crypto_shash *fallback; + u128 htable[16]; + struct crypto_shash *fallback; }; struct p8_ghash_desc_ctx { - u64 shash[2]; - u8 buffer[GHASH_DIGEST_SIZE]; - int bytes; - struct shash_desc fallback_desc; + u64 shash[2]; + u8 buffer[GHASH_DIGEST_SIZE]; + int bytes; + struct shash_desc fallback_desc; }; static int p8_ghash_init_tfm(struct crypto_tfm *tfm) { - const char *alg; - struct crypto_shash *fallback; - struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm); - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!(alg = crypto_tfm_alg_name(tfm))) { - printk(KERN_ERR "Failed to get algorithm name.\n"); - return -ENOENT; - } - - fallback = crypto_alloc_shash(alg, 0 ,CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - printk(KERN_INFO "Using '%s' as fallback implementation.\n", - crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback))); - - crypto_shash_set_flags(fallback, - crypto_shash_get_flags((struct crypto_shash *) tfm)); - ctx->fallback = fallback; - - shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx) - + crypto_shash_descsize(fallback); - - return 0; + const char *alg; + struct crypto_shash *fallback; + struct crypto_shash *shash_tfm = __crypto_shash_cast(tfm); + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = crypto_alloc_shash(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name(crypto_shash_tfm(fallback))); + + crypto_shash_set_flags(fallback, + crypto_shash_get_flags((struct crypto_shash + *) tfm)); + ctx->fallback = fallback; + + shash_tfm->descsize = sizeof(struct p8_ghash_desc_ctx) + + crypto_shash_descsize(fallback); + + return 0; } static void p8_ghash_exit_tfm(struct crypto_tfm *tfm) { - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) { - crypto_free_shash(ctx->fallback); - ctx->fallback = NULL; - } + if (ctx->fallback) { + crypto_free_shash(ctx->fallback); + ctx->fallback = NULL; + } } static int p8_ghash_init(struct shash_desc *desc) { - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->bytes = 0; - memset(dctx->shash, 0, GHASH_DIGEST_SIZE); - dctx->fallback_desc.tfm = ctx->fallback; - dctx->fallback_desc.flags = desc->flags; - return crypto_shash_init(&dctx->fallback_desc); + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->bytes = 0; + memset(dctx->shash, 0, GHASH_DIGEST_SIZE); + dctx->fallback_desc.tfm = ctx->fallback; + dctx->fallback_desc.flags = desc->flags; + return crypto_shash_init(&dctx->fallback_desc); } static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) + unsigned int keylen) { - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); - - if (keylen != GHASH_KEY_LEN) - return -EINVAL; - - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - enable_kernel_fp(); - gcm_init_p8(ctx->htable, (const u64 *) key); - pagefault_enable(); - preempt_enable(); - return crypto_shash_setkey(ctx->fallback, key, keylen); + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); + + if (keylen != GHASH_KEY_LEN) + return -EINVAL; + + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_init_p8(ctx->htable, (const u64 *) key); + pagefault_enable(); + preempt_enable(); + return crypto_shash_setkey(ctx->fallback, key, keylen); } static int p8_ghash_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) + const u8 *src, unsigned int srclen) { - unsigned int len; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (IN_INTERRUPT) { - return crypto_shash_update(&dctx->fallback_desc, src, srclen); - } else { - if (dctx->bytes) { - if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { - memcpy(dctx->buffer + dctx->bytes, src, srclen); - dctx->bytes += srclen; - return 0; - } - memcpy(dctx->buffer + dctx->bytes, src, - GHASH_DIGEST_SIZE - dctx->bytes); - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - enable_kernel_fp(); - gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, - GHASH_DIGEST_SIZE); - pagefault_enable(); - preempt_enable(); - src += GHASH_DIGEST_SIZE - dctx->bytes; - srclen -= GHASH_DIGEST_SIZE - dctx->bytes; - dctx->bytes = 0; - } - len = srclen & ~(GHASH_DIGEST_SIZE - 1); - if (len) { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - enable_kernel_fp(); - gcm_ghash_p8(dctx->shash, ctx->htable, src, len); - pagefault_enable(); - preempt_enable(); - src += len; - srclen -= len; - } - if (srclen) { - memcpy(dctx->buffer, src, srclen); - dctx->bytes = srclen; - } - return 0; - } + unsigned int len; + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + if (IN_INTERRUPT) { + return crypto_shash_update(&dctx->fallback_desc, src, + srclen); + } else { + if (dctx->bytes) { + if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { + memcpy(dctx->buffer + dctx->bytes, src, + srclen); + dctx->bytes += srclen; + return 0; + } + memcpy(dctx->buffer + dctx->bytes, src, + GHASH_DIGEST_SIZE - dctx->bytes); + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, + dctx->buffer, GHASH_DIGEST_SIZE); + pagefault_enable(); + preempt_enable(); + src += GHASH_DIGEST_SIZE - dctx->bytes; + srclen -= GHASH_DIGEST_SIZE - dctx->bytes; + dctx->bytes = 0; + } + len = srclen & ~(GHASH_DIGEST_SIZE - 1); + if (len) { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, src, len); + pagefault_enable(); + preempt_enable(); + src += len; + srclen -= len; + } + if (srclen) { + memcpy(dctx->buffer, src, srclen); + dctx->bytes = srclen; + } + return 0; + } } static int p8_ghash_final(struct shash_desc *desc, u8 *out) { - int i; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (IN_INTERRUPT) { - return crypto_shash_final(&dctx->fallback_desc, out); - } else { - if (dctx->bytes) { - for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) - dctx->buffer[i] = 0; - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - enable_kernel_fp(); - gcm_ghash_p8(dctx->shash, ctx->htable, dctx->buffer, - GHASH_DIGEST_SIZE); - pagefault_enable(); - preempt_enable(); - dctx->bytes = 0; - } - memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); - return 0; - } + int i; + struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); + struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); + + if (IN_INTERRUPT) { + return crypto_shash_final(&dctx->fallback_desc, out); + } else { + if (dctx->bytes) { + for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) + dctx->buffer[i] = 0; + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + enable_kernel_fp(); + gcm_ghash_p8(dctx->shash, ctx->htable, + dctx->buffer, GHASH_DIGEST_SIZE); + pagefault_enable(); + preempt_enable(); + dctx->bytes = 0; + } + memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); + return 0; + } } struct shash_alg p8_ghash_alg = { - .digestsize = GHASH_DIGEST_SIZE, - .init = p8_ghash_init, - .update = p8_ghash_update, - .final = p8_ghash_final, - .setkey = p8_ghash_setkey, - .descsize = sizeof(struct p8_ghash_desc_ctx), - .base = { - .cra_name = "ghash", - .cra_driver_name = "p8_ghash", - .cra_priority = 1000, - .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_ghash_ctx), - .cra_module = THIS_MODULE, - .cra_init = p8_ghash_init_tfm, - .cra_exit = p8_ghash_exit_tfm, - }, + .digestsize = GHASH_DIGEST_SIZE, + .init = p8_ghash_init, + .update = p8_ghash_update, + .final = p8_ghash_final, + .setkey = p8_ghash_setkey, + .descsize = sizeof(struct p8_ghash_desc_ctx), + .base = { + .cra_name = "ghash", + .cra_driver_name = "p8_ghash", + .cra_priority = 1000, + .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = GHASH_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_ghash_ctx), + .cra_module = THIS_MODULE, + .cra_init = p8_ghash_init_tfm, + .cra_exit = p8_ghash_exit_tfm, + }, }; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index 44d8d5c..e163d57 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -32,57 +32,57 @@ extern struct crypto_alg p8_aes_alg; extern struct crypto_alg p8_aes_cbc_alg; extern struct crypto_alg p8_aes_ctr_alg; static struct crypto_alg *algs[] = { - &p8_aes_alg, - &p8_aes_cbc_alg, - &p8_aes_ctr_alg, - NULL, + &p8_aes_alg, + &p8_aes_cbc_alg, + &p8_aes_ctr_alg, + NULL, }; int __init p8_init(void) { - int ret = 0; - struct crypto_alg **alg_it; + int ret = 0; + struct crypto_alg **alg_it; - if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) - return -ENODEV; + if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + return -ENODEV; - for (alg_it = algs; *alg_it; alg_it++) { - ret = crypto_register_alg(*alg_it); - printk(KERN_INFO "crypto_register_alg '%s' = %d\n", - (*alg_it)->cra_name, ret); - if (ret) { - for (alg_it--; alg_it >= algs; alg_it--) - crypto_unregister_alg(*alg_it); - break; - } - } - if (ret) - return ret; + for (alg_it = algs; *alg_it; alg_it++) { + ret = crypto_register_alg(*alg_it); + printk(KERN_INFO "crypto_register_alg '%s' = %d\n", + (*alg_it)->cra_name, ret); + if (ret) { + for (alg_it--; alg_it >= algs; alg_it--) + crypto_unregister_alg(*alg_it); + break; + } + } + if (ret) + return ret; - ret = crypto_register_shash(&p8_ghash_alg); - if (ret) { - for (alg_it = algs; *alg_it; alg_it++) - crypto_unregister_alg(*alg_it); - } - return ret; + ret = crypto_register_shash(&p8_ghash_alg); + if (ret) { + for (alg_it = algs; *alg_it; alg_it++) + crypto_unregister_alg(*alg_it); + } + return ret; } void __exit p8_exit(void) { - struct crypto_alg **alg_it; + struct crypto_alg **alg_it; - for (alg_it = algs; *alg_it; alg_it++) { - printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name); - crypto_unregister_alg(*alg_it); - } - crypto_unregister_shash(&p8_ghash_alg); + for (alg_it = algs; *alg_it; alg_it++) { + printk(KERN_INFO "Removing '%s'\n", (*alg_it)->cra_name); + crypto_unregister_alg(*alg_it); + } + crypto_unregister_shash(&p8_ghash_alg); } module_init(p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); -MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8"); +MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " + "support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); - |