diff options
116 files changed, 12079 insertions, 4348 deletions
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl index efc8d90..5b05510 100644 --- a/Documentation/DocBook/crypto-API.tmpl +++ b/Documentation/DocBook/crypto-API.tmpl @@ -1671,7 +1671,7 @@ read(opfd, out, outlen); <chapter id="API"><title>Programming Interface</title> <sect1><title>Block Cipher Context Data Structures</title> !Pinclude/linux/crypto.h Block Cipher Context Data Structures -!Finclude/linux/crypto.h aead_request +!Finclude/crypto/aead.h aead_request </sect1> <sect1><title>Block Cipher Algorithm Definitions</title> !Pinclude/linux/crypto.h Block Cipher Algorithm Definitions @@ -1680,7 +1680,7 @@ read(opfd, out, outlen); !Finclude/linux/crypto.h aead_alg !Finclude/linux/crypto.h blkcipher_alg !Finclude/linux/crypto.h cipher_alg -!Finclude/linux/crypto.h rng_alg +!Finclude/crypto/rng.h rng_alg </sect1> <sect1><title>Asynchronous Block Cipher API</title> !Pinclude/linux/crypto.h Asynchronous Block Cipher API @@ -1704,26 +1704,26 @@ read(opfd, out, outlen); !Finclude/linux/crypto.h ablkcipher_request_set_crypt </sect1> <sect1><title>Authenticated Encryption With Associated Data (AEAD) Cipher API</title> -!Pinclude/linux/crypto.h Authenticated Encryption With Associated Data (AEAD) Cipher API -!Finclude/linux/crypto.h crypto_alloc_aead -!Finclude/linux/crypto.h crypto_free_aead -!Finclude/linux/crypto.h crypto_aead_ivsize -!Finclude/linux/crypto.h crypto_aead_authsize -!Finclude/linux/crypto.h crypto_aead_blocksize -!Finclude/linux/crypto.h crypto_aead_setkey -!Finclude/linux/crypto.h crypto_aead_setauthsize -!Finclude/linux/crypto.h crypto_aead_encrypt -!Finclude/linux/crypto.h crypto_aead_decrypt +!Pinclude/crypto/aead.h Authenticated Encryption With Associated Data (AEAD) Cipher API +!Finclude/crypto/aead.h crypto_alloc_aead +!Finclude/crypto/aead.h crypto_free_aead +!Finclude/crypto/aead.h crypto_aead_ivsize +!Finclude/crypto/aead.h crypto_aead_authsize +!Finclude/crypto/aead.h crypto_aead_blocksize +!Finclude/crypto/aead.h crypto_aead_setkey +!Finclude/crypto/aead.h crypto_aead_setauthsize +!Finclude/crypto/aead.h crypto_aead_encrypt +!Finclude/crypto/aead.h crypto_aead_decrypt </sect1> <sect1><title>Asynchronous AEAD Request Handle</title> -!Pinclude/linux/crypto.h Asynchronous AEAD Request Handle -!Finclude/linux/crypto.h crypto_aead_reqsize -!Finclude/linux/crypto.h aead_request_set_tfm -!Finclude/linux/crypto.h aead_request_alloc -!Finclude/linux/crypto.h aead_request_free -!Finclude/linux/crypto.h aead_request_set_callback -!Finclude/linux/crypto.h aead_request_set_crypt -!Finclude/linux/crypto.h aead_request_set_assoc +!Pinclude/crypto/aead.h Asynchronous AEAD Request Handle +!Finclude/crypto/aead.h crypto_aead_reqsize +!Finclude/crypto/aead.h aead_request_set_tfm +!Finclude/crypto/aead.h aead_request_alloc +!Finclude/crypto/aead.h aead_request_free +!Finclude/crypto/aead.h aead_request_set_callback +!Finclude/crypto/aead.h aead_request_set_crypt +!Finclude/crypto/aead.h aead_request_set_assoc </sect1> <sect1><title>Synchronous Block Cipher API</title> !Pinclude/linux/crypto.h Synchronous Block Cipher API diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt index 38988ef..f0d926b 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt @@ -1,9 +1,11 @@ -Freescale SoC SEC Security Engines versions 2.x-3.x +Freescale SoC SEC Security Engines versions 1.x-2.x-3.x Required properties: - compatible : Should contain entries for this and backward compatible - SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" + SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3) + e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1) + warning: SEC1 and SEC2 are mutually exclusive - reg : Offset and length of the register set for the device - interrupts : the SEC's interrupt number - fsl,num-channels : An integer representing the number of channels diff --git a/MAINTAINERS b/MAINTAINERS index 56a432d..912c9d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4861,8 +4861,11 @@ F: drivers/crypto/nx/ IBM Power 842 compression accelerator M: Dan Streetman <ddstreet@us.ibm.com> S: Supported -F: drivers/crypto/nx/nx-842.c +F: drivers/crypto/nx/nx-842* F: include/linux/nx842.h +F: include/linux/sw842.h +F: crypto/842.c +F: lib/842/ IBM Power Linux RAID adapter M: Brian King <brking@us.ibm.com> diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 8da2207..27ed1b1 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM SHA-256 secure hash standard (DFIPS 180-2) implemented using optimized ARM assembler and NEON, when available. -config CRYPTO_SHA512_ARM_NEON - tristate "SHA384 and SHA512 digest algorithm (ARM NEON)" - depends on KERNEL_MODE_NEON - select CRYPTO_SHA512 +config CRYPTO_SHA512_ARM + tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)" select CRYPTO_HASH + depends on !CPU_V7M help SHA-512 secure hash standard (DFIPS 180-2) implemented - using ARM NEON instructions, when available. - - This version of SHA implements a 512 bit hash with 256 bits of - security against collision attacks. - - This code also includes SHA-384, a 384 bit hash with 192 bits - of security against collision attacks. + using optimized ARM assembler and NEON, when available. config CRYPTO_AES_ARM tristate "AES cipher algorithms (ARM-asm)" diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 6ea8282..fc51507 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o -obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o @@ -30,7 +30,8 @@ sha1-arm-y := sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) -sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o +sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o +sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o @@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl $(call cmd,perl) -.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S +$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl + $(call cmd,perl) + +.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 8cfa468..987aa63 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -101,15 +101,14 @@ \dround q10, q11 blo 0f @ AES-128: 10 rounds vld1.8 {q10-q11}, [ip]! - beq 1f @ AES-192: 12 rounds \dround q12, q13 + beq 1f @ AES-192: 12 rounds vld1.8 {q12-q13}, [ip] \dround q10, q11 0: \fround q12, q13, q14 bx lr -1: \dround q12, q13 - \fround q10, q11, q14 +1: \fround q10, q11, q14 bx lr .endm @@ -122,8 +121,8 @@ * q2 : third in/output block (_3x version only) * q8 : first round key * q9 : secound round key - * ip : address of 3rd round key * q14 : final round key + * r2 : address of round key array * r3 : number of rounds */ .align 6 diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl new file mode 100644 index 0000000..a2b11a8 --- /dev/null +++ b/arch/arm/crypto/sha512-armv4.pl @@ -0,0 +1,649 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. +# ==================================================================== + +# SHA512 block procedure for ARMv4. September 2007. + +# This code is ~4.5 (four and a half) times faster than code generated +# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +# Xscale PXA250 core]. +# +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 6% improvement on +# Cortex A8 core and ~40 cycles per processed byte. + +# February 2011. +# +# Profiler-assisted and platform-specific optimization resulted in 7% +# improvement on Coxtex A8 core and ~38 cycles per byte. + +# March 2011. +# +# Add NEON implementation. On Cortex A8 it was measured to process +# one byte in 23.3 cycles or ~60% faster than integer-only code. + +# August 2012. +# +# Improve NEON performance by 12% on Snapdragon S4. In absolute +# terms it's 22.6 cycles per byte, which is disappointing result. +# Technical writers asserted that 3-way S4 pipeline can sustain +# multiple NEON instructions per cycle, but dual NEON issue could +# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +# for further details. On side note Cortex-A15 processes one byte in +# 16 cycles. + +# Byte order [in]dependence. ========================================= +# +# Originally caller was expected to maintain specific *dword* order in +# h[0-7], namely with most significant dword at *lower* address, which +# was reflected in below two parameters as 0 and 4. Now caller is +# expected to maintain native byte order for whole 64-bit values. +$hi="HI"; +$lo="LO"; +# ==================================================================== + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + +$ctx="r0"; # parameter block +$inp="r1"; +$len="r2"; + +$Tlo="r3"; +$Thi="r4"; +$Alo="r5"; +$Ahi="r6"; +$Elo="r7"; +$Ehi="r8"; +$t0="r9"; +$t1="r10"; +$t2="r11"; +$t3="r12"; +############ r13 is stack pointer +$Ktbl="r14"; +############ r15 is program counter + +$Aoff=8*0; +$Boff=8*1; +$Coff=8*2; +$Doff=8*3; +$Eoff=8*4; +$Foff=8*5; +$Goff=8*6; +$Hoff=8*7; +$Xoff=8*8; + +sub BODY_00_15() { +my $magic = shift; +$code.=<<___; + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov $t0,$Elo,lsr#14 + str $Tlo,[sp,#$Xoff+0] + mov $t1,$Ehi,lsr#14 + str $Thi,[sp,#$Xoff+4] + eor $t0,$t0,$Ehi,lsl#18 + ldr $t2,[sp,#$Hoff+0] @ h.lo + eor $t1,$t1,$Elo,lsl#18 + ldr $t3,[sp,#$Hoff+4] @ h.hi + eor $t0,$t0,$Elo,lsr#18 + eor $t1,$t1,$Ehi,lsr#18 + eor $t0,$t0,$Ehi,lsl#14 + eor $t1,$t1,$Elo,lsl#14 + eor $t0,$t0,$Ehi,lsr#9 + eor $t1,$t1,$Elo,lsr#9 + eor $t0,$t0,$Elo,lsl#23 + eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) + adds $Tlo,$Tlo,$t0 + ldr $t0,[sp,#$Foff+0] @ f.lo + adc $Thi,$Thi,$t1 @ T += Sigma1(e) + ldr $t1,[sp,#$Foff+4] @ f.hi + adds $Tlo,$Tlo,$t2 + ldr $t2,[sp,#$Goff+0] @ g.lo + adc $Thi,$Thi,$t3 @ T += h + ldr $t3,[sp,#$Goff+4] @ g.hi + + eor $t0,$t0,$t2 + str $Elo,[sp,#$Eoff+0] + eor $t1,$t1,$t3 + str $Ehi,[sp,#$Eoff+4] + and $t0,$t0,$Elo + str $Alo,[sp,#$Aoff+0] + and $t1,$t1,$Ehi + str $Ahi,[sp,#$Aoff+4] + eor $t0,$t0,$t2 + ldr $t2,[$Ktbl,#$lo] @ K[i].lo + eor $t1,$t1,$t3 @ Ch(e,f,g) + ldr $t3,[$Ktbl,#$hi] @ K[i].hi + + adds $Tlo,$Tlo,$t0 + ldr $Elo,[sp,#$Doff+0] @ d.lo + adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) + ldr $Ehi,[sp,#$Doff+4] @ d.hi + adds $Tlo,$Tlo,$t2 + and $t0,$t2,#0xff + adc $Thi,$Thi,$t3 @ T += K[i] + adds $Elo,$Elo,$Tlo + ldr $t2,[sp,#$Boff+0] @ b.lo + adc $Ehi,$Ehi,$Thi @ d += T + teq $t0,#$magic + + ldr $t3,[sp,#$Coff+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq $Ktbl,$Ktbl,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov $t0,$Alo,lsr#28 + mov $t1,$Ahi,lsr#28 + eor $t0,$t0,$Ahi,lsl#4 + eor $t1,$t1,$Alo,lsl#4 + eor $t0,$t0,$Ahi,lsr#2 + eor $t1,$t1,$Alo,lsr#2 + eor $t0,$t0,$Alo,lsl#30 + eor $t1,$t1,$Ahi,lsl#30 + eor $t0,$t0,$Ahi,lsr#7 + eor $t1,$t1,$Alo,lsr#7 + eor $t0,$t0,$Alo,lsl#25 + eor $t1,$t1,$Ahi,lsl#25 @ Sigma0(a) + adds $Tlo,$Tlo,$t0 + and $t0,$Alo,$t2 + adc $Thi,$Thi,$t1 @ T += Sigma0(a) + + ldr $t1,[sp,#$Boff+4] @ b.hi + orr $Alo,$Alo,$t2 + ldr $t2,[sp,#$Coff+4] @ c.hi + and $Alo,$Alo,$t3 + and $t3,$Ahi,$t1 + orr $Ahi,$Ahi,$t1 + orr $Alo,$Alo,$t0 @ Maj(a,b,c).lo + and $Ahi,$Ahi,$t2 + adds $Alo,$Alo,$Tlo + orr $Ahi,$Ahi,$t3 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc $Ahi,$Ahi,$Thi @ h += T + tst $Ktbl,#1 + add $Ktbl,$Ktbl,#8 +___ +} +$code=<<___; +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha512_block_data_order +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + add $len,$inp,$len,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4-r12,lr} + sub $Ktbl,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr $Elo,[$ctx,#$Eoff+$lo] + ldr $Ehi,[$ctx,#$Eoff+$hi] + ldr $t0, [$ctx,#$Goff+$lo] + ldr $t1, [$ctx,#$Goff+$hi] + ldr $t2, [$ctx,#$Hoff+$lo] + ldr $t3, [$ctx,#$Hoff+$hi] +.Loop: + str $t0, [sp,#$Goff+0] + str $t1, [sp,#$Goff+4] + str $t2, [sp,#$Hoff+0] + str $t3, [sp,#$Hoff+4] + ldr $Alo,[$ctx,#$Aoff+$lo] + ldr $Ahi,[$ctx,#$Aoff+$hi] + ldr $Tlo,[$ctx,#$Boff+$lo] + ldr $Thi,[$ctx,#$Boff+$hi] + ldr $t0, [$ctx,#$Coff+$lo] + ldr $t1, [$ctx,#$Coff+$hi] + ldr $t2, [$ctx,#$Doff+$lo] + ldr $t3, [$ctx,#$Doff+$hi] + str $Tlo,[sp,#$Boff+0] + str $Thi,[sp,#$Boff+4] + str $t0, [sp,#$Coff+0] + str $t1, [sp,#$Coff+4] + str $t2, [sp,#$Doff+0] + str $t3, [sp,#$Doff+4] + ldr $Tlo,[$ctx,#$Foff+$lo] + ldr $Thi,[$ctx,#$Foff+$hi] + str $Tlo,[sp,#$Foff+0] + str $Thi,[sp,#$Foff+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb $Tlo,[$inp,#7] + ldrb $t0, [$inp,#6] + ldrb $t1, [$inp,#5] + ldrb $t2, [$inp,#4] + ldrb $Thi,[$inp,#3] + ldrb $t3, [$inp,#2] + orr $Tlo,$Tlo,$t0,lsl#8 + ldrb $t0, [$inp,#1] + orr $Tlo,$Tlo,$t1,lsl#16 + ldrb $t1, [$inp],#8 + orr $Tlo,$Tlo,$t2,lsl#24 + orr $Thi,$Thi,$t3,lsl#8 + orr $Thi,$Thi,$t0,lsl#16 + orr $Thi,$Thi,$t1,lsl#24 +#else + ldr $Tlo,[$inp,#4] + ldr $Thi,[$inp],#8 +#ifdef __ARMEL__ + rev $Tlo,$Tlo + rev $Thi,$Thi +#endif +#endif +___ + &BODY_00_15(0x94); +$code.=<<___; + tst $Ktbl,#1 + beq .L00_15 + ldr $t0,[sp,#`$Xoff+8*(16-1)`+0] + ldr $t1,[sp,#`$Xoff+8*(16-1)`+4] + bic $Ktbl,$Ktbl,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov $Tlo,$t0,lsr#1 + ldr $t2,[sp,#`$Xoff+8*(16-14)`+0] + mov $Thi,$t1,lsr#1 + ldr $t3,[sp,#`$Xoff+8*(16-14)`+4] + eor $Tlo,$Tlo,$t1,lsl#31 + eor $Thi,$Thi,$t0,lsl#31 + eor $Tlo,$Tlo,$t0,lsr#8 + eor $Thi,$Thi,$t1,lsr#8 + eor $Tlo,$Tlo,$t1,lsl#24 + eor $Thi,$Thi,$t0,lsl#24 + eor $Tlo,$Tlo,$t0,lsr#7 + eor $Thi,$Thi,$t1,lsr#7 + eor $Tlo,$Tlo,$t1,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov $t0,$t2,lsr#19 + mov $t1,$t3,lsr#19 + eor $t0,$t0,$t3,lsl#13 + eor $t1,$t1,$t2,lsl#13 + eor $t0,$t0,$t3,lsr#29 + eor $t1,$t1,$t2,lsr#29 + eor $t0,$t0,$t2,lsl#3 + eor $t1,$t1,$t3,lsl#3 + eor $t0,$t0,$t2,lsr#6 + eor $t1,$t1,$t3,lsr#6 + ldr $t2,[sp,#`$Xoff+8*(16-9)`+0] + eor $t0,$t0,$t3,lsl#26 + + ldr $t3,[sp,#`$Xoff+8*(16-9)`+4] + adds $Tlo,$Tlo,$t0 + ldr $t0,[sp,#`$Xoff+8*16`+0] + adc $Thi,$Thi,$t1 + + ldr $t1,[sp,#`$Xoff+8*16`+4] + adds $Tlo,$Tlo,$t2 + adc $Thi,$Thi,$t3 + adds $Tlo,$Tlo,$t0 + adc $Thi,$Thi,$t1 +___ + &BODY_00_15(0x17); +$code.=<<___; +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] + ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] + beq .L16_79 + bic $Ktbl,$Ktbl,#1 + + ldr $Tlo,[sp,#$Boff+0] + ldr $Thi,[sp,#$Boff+4] + ldr $t0, [$ctx,#$Aoff+$lo] + ldr $t1, [$ctx,#$Aoff+$hi] + ldr $t2, [$ctx,#$Boff+$lo] + ldr $t3, [$ctx,#$Boff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Aoff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Aoff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Boff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Boff+$hi] + + ldr $Alo,[sp,#$Coff+0] + ldr $Ahi,[sp,#$Coff+4] + ldr $Tlo,[sp,#$Doff+0] + ldr $Thi,[sp,#$Doff+4] + ldr $t0, [$ctx,#$Coff+$lo] + ldr $t1, [$ctx,#$Coff+$hi] + ldr $t2, [$ctx,#$Doff+$lo] + ldr $t3, [$ctx,#$Doff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Coff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Coff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Doff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Doff+$hi] + + ldr $Tlo,[sp,#$Foff+0] + ldr $Thi,[sp,#$Foff+4] + ldr $t0, [$ctx,#$Eoff+$lo] + ldr $t1, [$ctx,#$Eoff+$hi] + ldr $t2, [$ctx,#$Foff+$lo] + ldr $t3, [$ctx,#$Foff+$hi] + adds $Elo,$Elo,$t0 + str $Elo,[$ctx,#$Eoff+$lo] + adc $Ehi,$Ehi,$t1 + str $Ehi,[$ctx,#$Eoff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Foff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Foff+$hi] + + ldr $Alo,[sp,#$Goff+0] + ldr $Ahi,[sp,#$Goff+4] + ldr $Tlo,[sp,#$Hoff+0] + ldr $Thi,[sp,#$Hoff+4] + ldr $t0, [$ctx,#$Goff+$lo] + ldr $t1, [$ctx,#$Goff+$hi] + ldr $t2, [$ctx,#$Hoff+$lo] + ldr $t3, [$ctx,#$Hoff+$hi] + adds $t0,$Alo,$t0 + str $t0, [$ctx,#$Goff+$lo] + adc $t1,$Ahi,$t1 + str $t1, [$ctx,#$Goff+$hi] + adds $t2,$Tlo,$t2 + str $t2, [$ctx,#$Hoff+$lo] + adc $t3,$Thi,$t3 + str $t3, [$ctx,#$Hoff+$hi] + + add sp,sp,#640 + sub $Ktbl,$Ktbl,#640 + + teq $inp,$len + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + bx lr @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order,.-sha512_block_data_order +___ + +{ +my @Sigma0=(28,34,39); +my @Sigma1=(14,18,41); +my @sigma0=(1, 8, 7); +my @sigma1=(19,61,6); + +my $Ktbl="r3"; +my $cnt="r12"; # volatile register known as ip, intra-procedure-call scratch + +my @X=map("d$_",(0..15)); +my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23)); + +sub NEON_00_15() { +my $i=shift; +my ($a,$b,$c,$d,$e,$f,$g,$h)=@_; +my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31)); # temps + +$code.=<<___ if ($i<16 || $i&1); + vshr.u64 $t0,$e,#@Sigma1[0] @ $i +#if $i<16 + vld1.64 {@X[$i%16]},[$inp]! @ handles unaligned +#endif + vshr.u64 $t1,$e,#@Sigma1[1] +#if $i>0 + vadd.i64 $a,$Maj @ h+=Maj from the past +#endif + vshr.u64 $t2,$e,#@Sigma1[2] +___ +$code.=<<___; + vld1.64 {$K},[$Ktbl,:64]! @ K[i++] + vsli.64 $t0,$e,#`64-@Sigma1[0]` + vsli.64 $t1,$e,#`64-@Sigma1[1]` + vmov $Ch,$e + vsli.64 $t2,$e,#`64-@Sigma1[2]` +#if $i<16 && defined(__ARMEL__) + vrev64.8 @X[$i],@X[$i] +#endif + veor $t1,$t0 + vbsl $Ch,$f,$g @ Ch(e,f,g) + vshr.u64 $t0,$a,#@Sigma0[0] + veor $t2,$t1 @ Sigma1(e) + vadd.i64 $T1,$Ch,$h + vshr.u64 $t1,$a,#@Sigma0[1] + vsli.64 $t0,$a,#`64-@Sigma0[0]` + vadd.i64 $T1,$t2 + vshr.u64 $t2,$a,#@Sigma0[2] + vadd.i64 $K,@X[$i%16] + vsli.64 $t1,$a,#`64-@Sigma0[1]` + veor $Maj,$a,$b + vsli.64 $t2,$a,#`64-@Sigma0[2]` + veor $h,$t0,$t1 + vadd.i64 $T1,$K + vbsl $Maj,$c,$b @ Maj(a,b,c) + veor $h,$t2 @ Sigma0(a) + vadd.i64 $d,$T1 + vadd.i64 $Maj,$T1 + @ vadd.i64 $h,$Maj +___ +} + +sub NEON_16_79() { +my $i=shift; + +if ($i&1) { &NEON_00_15($i,@_); return; } + +# 2x-vectorized, therefore runs every 2nd round +my @X=map("q$_",(0..7)); # view @X as 128-bit vector +my ($t0,$t1,$s0,$s1) = map("q$_",(12..15)); # temps +my ($d0,$d1,$d2) = map("d$_",(24..26)); # temps from NEON_00_15 +my $e=@_[4]; # $e from NEON_00_15 +$i /= 2; +$code.=<<___; + vshr.u64 $t0,@X[($i+7)%8],#@sigma1[0] + vshr.u64 $t1,@X[($i+7)%8],#@sigma1[1] + vadd.i64 @_[0],d30 @ h+=Maj from the past + vshr.u64 $s1,@X[($i+7)%8],#@sigma1[2] + vsli.64 $t0,@X[($i+7)%8],#`64-@sigma1[0]` + vext.8 $s0,@X[$i%8],@X[($i+1)%8],#8 @ X[i+1] + vsli.64 $t1,@X[($i+7)%8],#`64-@sigma1[1]` + veor $s1,$t0 + vshr.u64 $t0,$s0,#@sigma0[0] + veor $s1,$t1 @ sigma1(X[i+14]) + vshr.u64 $t1,$s0,#@sigma0[1] + vadd.i64 @X[$i%8],$s1 + vshr.u64 $s1,$s0,#@sigma0[2] + vsli.64 $t0,$s0,#`64-@sigma0[0]` + vsli.64 $t1,$s0,#`64-@sigma0[1]` + vext.8 $s0,@X[($i+4)%8],@X[($i+5)%8],#8 @ X[i+9] + veor $s1,$t0 + vshr.u64 $d0,$e,#@Sigma1[0] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s0 + vshr.u64 $d1,$e,#@Sigma1[1] @ from NEON_00_15 + veor $s1,$t1 @ sigma0(X[i+1]) + vshr.u64 $d2,$e,#@Sigma1[2] @ from NEON_00_15 + vadd.i64 @X[$i%8],$s1 +___ + &NEON_00_15(2*$i,@_); +} + +$code.=<<___; +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add $len,$inp,$len,lsl#7 @ len to point at the end of inp + VFP_ABI_PUSH + adrl $Ktbl,K512 + vldmia $ctx,{$A-$H} @ load context +.Loop_neon: +___ +for($i=0;$i<16;$i++) { &NEON_00_15($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + mov $cnt,#4 +.L16_79_neon: + subs $cnt,#1 +___ +for(;$i<32;$i++) { &NEON_16_79($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + bne .L16_79_neon + + vadd.i64 $A,d30 @ h+=Maj from the past + vldmia $ctx,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia $ctx,{$A-$H} @ save context + teq $inp,$len + sub $Ktbl,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + ret @ bx lr +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +___ +} +$code.=<<___; +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 +$code =~ s/\bret\b/bx lr/gm; + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + +print $code; +close STDOUT; # enforce flush diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S deleted file mode 100644 index fe99472..0000000 --- a/arch/arm/crypto/sha512-armv7-neon.S +++ /dev/null @@ -1,455 +0,0 @@ -/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform - * - * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - -#include <linux/linkage.h> - - -.syntax unified -.code 32 -.fpu neon - -.text - -/* structure of SHA512_CONTEXT */ -#define hd_a 0 -#define hd_b ((hd_a) + 8) -#define hd_c ((hd_b) + 8) -#define hd_d ((hd_c) + 8) -#define hd_e ((hd_d) + 8) -#define hd_f ((hd_e) + 8) -#define hd_g ((hd_f) + 8) - -/* register macros */ -#define RK %r2 - -#define RA d0 -#define RB d1 -#define RC d2 -#define RD d3 -#define RE d4 -#define RF d5 -#define RG d6 -#define RH d7 - -#define RT0 d8 -#define RT1 d9 -#define RT2 d10 -#define RT3 d11 -#define RT4 d12 -#define RT5 d13 -#define RT6 d14 -#define RT7 d15 - -#define RT01q q4 -#define RT23q q5 -#define RT45q q6 -#define RT67q q7 - -#define RW0 d16 -#define RW1 d17 -#define RW2 d18 -#define RW3 d19 -#define RW4 d20 -#define RW5 d21 -#define RW6 d22 -#define RW7 d23 -#define RW8 d24 -#define RW9 d25 -#define RW10 d26 -#define RW11 d27 -#define RW12 d28 -#define RW13 d29 -#define RW14 d30 -#define RW15 d31 - -#define RW01q q8 -#define RW23q q9 -#define RW45q q10 -#define RW67q q11 -#define RW89q q12 -#define RW1011q q13 -#define RW1213q q14 -#define RW1415q q15 - -/*********************************************************************** - * ARM assembly implementation of sha512 transform - ***********************************************************************/ -#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \ - rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ - /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ - vshr.u64 RT2, re, #14; \ - vshl.u64 RT3, re, #64 - 14; \ - interleave_op(arg1); \ - vshr.u64 RT4, re, #18; \ - vshl.u64 RT5, re, #64 - 18; \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, re, #41; \ - vshl.u64 RT5, re, #64 - 41; \ - vadd.u64 RT0, RT0, rw0; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, re; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, rf, rg; \ - \ - vadd.u64 RT1, RT1, rh; \ - vshr.u64 RT2, ra, #28; \ - vshl.u64 RT3, ra, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, ra, #34; \ - vshl.u64 RT5, ra, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* h = Sum0 (a) + Maj (a, b, c); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, ra, #39; \ - vshl.u64 RT5, ra, #64 - 39; \ - veor.64 RT0, ra, rb; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rc, rb; \ - vadd.u64 rd, rd, RT1; /* d+=t1; */ \ - veor.64 rh, RT2, RT3; \ - \ - /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ - vshr.u64 RT2, rd, #14; \ - vshl.u64 RT3, rd, #64 - 14; \ - vadd.u64 rh, rh, RT0; \ - vshr.u64 RT4, rd, #18; \ - vshl.u64 RT5, rd, #64 - 18; \ - vadd.u64 rh, rh, RT1; /* h+=t1; */ \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rd, #41; \ - vshl.u64 RT5, rd, #64 - 41; \ - vadd.u64 RT0, RT0, rw1; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, rd; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, re, rf; \ - \ - vadd.u64 RT1, RT1, rg; \ - vshr.u64 RT2, rh, #28; \ - vshl.u64 RT3, rh, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, rh, #34; \ - vshl.u64 RT5, rh, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* g = Sum0 (h) + Maj (h, a, b); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rh, #39; \ - vshl.u64 RT5, rh, #64 - 39; \ - veor.64 RT0, rh, ra; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rb, ra; \ - vadd.u64 rc, rc, RT1; /* c+=t1; */ \ - veor.64 rg, RT2, RT3; \ - \ - /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \ - /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \ - \ - /**** S0(w[1:2]) */ \ - \ - /* w[0:1] += w[9:10] */ \ - /* RT23q = rw1:rw2 */ \ - vext.u64 RT23q, rw01q, rw23q, #1; \ - vadd.u64 rw0, rw9; \ - vadd.u64 rg, rg, RT0; \ - vadd.u64 rw1, rw10;\ - vadd.u64 rg, rg, RT1; /* g+=t1; */ \ - \ - vshr.u64 RT45q, RT23q, #1; \ - vshl.u64 RT67q, RT23q, #64 - 1; \ - vshr.u64 RT01q, RT23q, #8; \ - veor.u64 RT45q, RT45q, RT67q; \ - vshl.u64 RT67q, RT23q, #64 - 8; \ - veor.u64 RT45q, RT45q, RT01q; \ - vshr.u64 RT01q, RT23q, #7; \ - veor.u64 RT45q, RT45q, RT67q; \ - \ - /**** S1(w[14:15]) */ \ - vshr.u64 RT23q, rw1415q, #6; \ - veor.u64 RT01q, RT01q, RT45q; \ - vshr.u64 RT45q, rw1415q, #19; \ - vshl.u64 RT67q, rw1415q, #64 - 19; \ - veor.u64 RT23q, RT23q, RT45q; \ - vshr.u64 RT45q, rw1415q, #61; \ - veor.u64 RT23q, RT23q, RT67q; \ - vshl.u64 RT67q, rw1415q, #64 - 61; \ - veor.u64 RT23q, RT23q, RT45q; \ - vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \ - veor.u64 RT01q, RT23q, RT67q; -#define vadd_RT01q(rw01q) \ - /* w[0:1] += S(w[14:15]) */ \ - vadd.u64 rw01q, RT01q; - -#define dummy(_) /*_*/ - -#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \ - interleave_op1, arg1, interleave_op2, arg2) \ - /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ - vshr.u64 RT2, re, #14; \ - vshl.u64 RT3, re, #64 - 14; \ - interleave_op1(arg1); \ - vshr.u64 RT4, re, #18; \ - vshl.u64 RT5, re, #64 - 18; \ - interleave_op2(arg2); \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, re, #41; \ - vshl.u64 RT5, re, #64 - 41; \ - vadd.u64 RT0, RT0, rw0; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, re; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, rf, rg; \ - \ - vadd.u64 RT1, RT1, rh; \ - vshr.u64 RT2, ra, #28; \ - vshl.u64 RT3, ra, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, ra, #34; \ - vshl.u64 RT5, ra, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* h = Sum0 (a) + Maj (a, b, c); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, ra, #39; \ - vshl.u64 RT5, ra, #64 - 39; \ - veor.64 RT0, ra, rb; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rc, rb; \ - vadd.u64 rd, rd, RT1; /* d+=t1; */ \ - veor.64 rh, RT2, RT3; \ - \ - /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ - vshr.u64 RT2, rd, #14; \ - vshl.u64 RT3, rd, #64 - 14; \ - vadd.u64 rh, rh, RT0; \ - vshr.u64 RT4, rd, #18; \ - vshl.u64 RT5, rd, #64 - 18; \ - vadd.u64 rh, rh, RT1; /* h+=t1; */ \ - vld1.64 {RT0}, [RK]!; \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rd, #41; \ - vshl.u64 RT5, rd, #64 - 41; \ - vadd.u64 RT0, RT0, rw1; \ - veor.64 RT23q, RT23q, RT45q; \ - vmov.64 RT7, rd; \ - veor.64 RT1, RT2, RT3; \ - vbsl.64 RT7, re, rf; \ - \ - vadd.u64 RT1, RT1, rg; \ - vshr.u64 RT2, rh, #28; \ - vshl.u64 RT3, rh, #64 - 28; \ - vadd.u64 RT1, RT1, RT0; \ - vshr.u64 RT4, rh, #34; \ - vshl.u64 RT5, rh, #64 - 34; \ - vadd.u64 RT1, RT1, RT7; \ - \ - /* g = Sum0 (h) + Maj (h, a, b); */ \ - veor.64 RT23q, RT23q, RT45q; \ - vshr.u64 RT4, rh, #39; \ - vshl.u64 RT5, rh, #64 - 39; \ - veor.64 RT0, rh, ra; \ - veor.64 RT23q, RT23q, RT45q; \ - vbsl.64 RT0, rb, ra; \ - vadd.u64 rc, rc, RT1; /* c+=t1; */ \ - veor.64 rg, RT2, RT3; -#define vadd_rg_RT0(rg) \ - vadd.u64 rg, rg, RT0; -#define vadd_rg_RT1(rg) \ - vadd.u64 rg, rg, RT1; /* g+=t1; */ - -.align 3 -ENTRY(sha512_transform_neon) - /* Input: - * %r0: SHA512_CONTEXT - * %r1: data - * %r2: u64 k[] constants - * %r3: nblks - */ - push {%lr}; - - mov %lr, #0; - - /* Load context to d0-d7 */ - vld1.64 {RA-RD}, [%r0]!; - vld1.64 {RE-RH}, [%r0]; - sub %r0, #(4*8); - - /* Load input to w[16], d16-d31 */ - /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ - vld1.64 {RW0-RW3}, [%r1]!; - vld1.64 {RW4-RW7}, [%r1]!; - vld1.64 {RW8-RW11}, [%r1]!; - vld1.64 {RW12-RW15}, [%r1]!; -#ifdef __ARMEL__ - /* byteswap */ - vrev64.8 RW01q, RW01q; - vrev64.8 RW23q, RW23q; - vrev64.8 RW45q, RW45q; - vrev64.8 RW67q, RW67q; - vrev64.8 RW89q, RW89q; - vrev64.8 RW1011q, RW1011q; - vrev64.8 RW1213q, RW1213q; - vrev64.8 RW1415q, RW1415q; -#endif - - /* EABI says that d8-d15 must be preserved by callee. */ - /*vpush {RT0-RT7};*/ - -.Loop: - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, - RW23q, RW1415q, RW9, RW10, dummy, _); - b .Lenter_rounds; - -.Loop_rounds: - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, - RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q); -.Lenter_rounds: - rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, - RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q); - rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, - RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q); - rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, - RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); - rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, - RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); - rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, - RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); - add %lr, #16; - rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, - RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); - cmp %lr, #64; - rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, - RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); - bne .Loop_rounds; - - subs %r3, #1; - - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, - vadd_RT01q, RW1415q, dummy, _); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); - beq .Lhandle_tail; - vld1.64 {RW0-RW3}, [%r1]!; - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); -#ifdef __ARMEL__ - vrev64.8 RW01q, RW01q; - vrev64.8 RW23q, RW23q; -#endif - vld1.64 {RW4-RW7}, [%r1]!; - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, - vadd_rg_RT0, RA, vadd_rg_RT1, RA); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); -#ifdef __ARMEL__ - vrev64.8 RW45q, RW45q; - vrev64.8 RW67q, RW67q; -#endif - vld1.64 {RW8-RW11}, [%r1]!; - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); -#ifdef __ARMEL__ - vrev64.8 RW89q, RW89q; - vrev64.8 RW1011q, RW1011q; -#endif - vld1.64 {RW12-RW15}, [%r1]!; - vadd_rg_RT0(RA); - vadd_rg_RT1(RA); - - /* Load context */ - vld1.64 {RT0-RT3}, [%r0]!; - vld1.64 {RT4-RT7}, [%r0]; - sub %r0, #(4*8); - -#ifdef __ARMEL__ - vrev64.8 RW1213q, RW1213q; - vrev64.8 RW1415q, RW1415q; -#endif - - vadd.u64 RA, RT0; - vadd.u64 RB, RT1; - vadd.u64 RC, RT2; - vadd.u64 RD, RT3; - vadd.u64 RE, RT4; - vadd.u64 RF, RT5; - vadd.u64 RG, RT6; - vadd.u64 RH, RT7; - - /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; - sub RK, $(8*80); - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - mov %lr, #0; - sub %r0, #(4*8); - - b .Loop; - -.Lhandle_tail: - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); - rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, - vadd_rg_RT0, RA, vadd_rg_RT1, RA); - rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, - vadd_rg_RT0, RG, vadd_rg_RT1, RG); - rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, - vadd_rg_RT0, RE, vadd_rg_RT1, RE); - rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, - vadd_rg_RT0, RC, vadd_rg_RT1, RC); - - /* Load context to d16-d23 */ - vld1.64 {RW0-RW3}, [%r0]!; - vadd_rg_RT0(RA); - vld1.64 {RW4-RW7}, [%r0]; - vadd_rg_RT1(RA); - sub %r0, #(4*8); - - vadd.u64 RA, RW0; - vadd.u64 RB, RW1; - vadd.u64 RC, RW2; - vadd.u64 RD, RW3; - vadd.u64 RE, RW4; - vadd.u64 RF, RW5; - vadd.u64 RG, RW6; - vadd.u64 RH, RW7; - - /* Store the first half of context */ - vst1.64 {RA-RD}, [%r0]!; - - /* Clear used registers */ - /* d16-d31 */ - veor.u64 RW01q, RW01q; - veor.u64 RW23q, RW23q; - veor.u64 RW45q, RW45q; - veor.u64 RW67q, RW67q; - vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ - veor.u64 RW89q, RW89q; - veor.u64 RW1011q, RW1011q; - veor.u64 RW1213q, RW1213q; - veor.u64 RW1415q, RW1415q; - /* d8-d15 */ - /*vpop {RT0-RT7};*/ - /* d0-d7 (q0-q3) */ - veor.u64 %q0, %q0; - veor.u64 %q1, %q1; - veor.u64 %q2, %q2; - veor.u64 %q3, %q3; - - pop {%pc}; -ENDPROC(sha512_transform_neon) diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped new file mode 100644 index 0000000..3694c4d --- /dev/null +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -0,0 +1,1861 @@ + +@ ==================================================================== +@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA512 block procedure for ARMv4. September 2007. + +@ This code is ~4.5 (four and a half) times faster than code generated +@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +@ Xscale PXA250 core]. +@ +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 6% improvement on +@ Cortex A8 core and ~40 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 7% +@ improvement on Coxtex A8 core and ~38 cycles per byte. + +@ March 2011. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process +@ one byte in 23.3 cycles or ~60% faster than integer-only code. + +@ August 2012. +@ +@ Improve NEON performance by 12% on Snapdragon S4. In absolute +@ terms it's 22.6 cycles per byte, which is disappointing result. +@ Technical writers asserted that 3-way S4 pipeline can sustain +@ multiple NEON instructions per cycle, but dual NEON issue could +@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html +@ for further details. On side note Cortex-A15 processes one byte in +@ 16 cycles. + +@ Byte order [in]dependence. ========================================= +@ +@ Originally caller was expected to maintain specific *dword* order in +@ h[0-7], namely with most significant dword at *lower* address, which +@ was reflected in below two parameters as 0 and 4. Now caller is +@ expected to maintain native byte order for whole 64-bit values. +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +.text +#if __ARM_ARCH__<7 +.code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + +.type K512,%object +.align 5 +K512: +WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) +WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) +WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) +WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) +WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) +WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) +WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) +WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) +WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) +WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) +WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) +WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) +WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) +WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) +WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) +WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) +WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) +WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) +WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) +WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) +WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) +WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) +WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) +WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) +WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) +WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) +WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) +WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) +WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) +WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) +WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) +WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) +WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) +WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) +WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) +WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) +WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) +WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) +WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) +WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha512_block_data_order +.skip 32-4 +#else +.skip 32 +#endif + +.global sha512_block_data_order +.type sha512_block_data_order,%function +sha512_block_data_order: +#if __ARM_ARCH__<7 + sub r3,pc,#8 @ sha512_block_data_order +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#1 + bne .LNEON +#endif + add r2,r1,r2,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4-r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r12,pc} +#else + ldmia sp!,{r4-r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size sha512_block_data_order,.-sha512_block_data_order +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + +.global sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function +.align 4 +sha512_block_data_order_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add r2,r1,r2,lsl#7 @ len to point at the end of inp + VFP_ABI_PUSH + adrl r3,K512 + vldmia r0,{d16-d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24-d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16-d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + bx lr @ .word 0xe12fff1e +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon +#endif +.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>" +.align 2 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) +.comm OPENSSL_armcap_P,4,4 +#endif diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c new file mode 100644 index 0000000..269a394 --- /dev/null +++ b/arch/arm/crypto/sha512-glue.c @@ -0,0 +1,121 @@ +/* + * sha512-glue.c - accelerated SHA-384/512 for ARM + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha512_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/hwcap.h> +#include <asm/neon.h> + +#include "sha512.h" + +MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); +MODULE_ALIAS_CRYPTO("sha384-arm"); +MODULE_ALIAS_CRYPTO("sha512-arm"); + +asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks); + +int sha512_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); +} + +int sha512_arm_final(struct shash_desc *desc, u8 *out) +{ + sha512_base_do_finalize(desc, + (sha512_block_fn *)sha512_block_data_order); + return sha512_base_finish(desc, out); +} + +int sha512_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order); + return sha512_arm_final(desc, out); +} + +static struct shash_alg sha512_arm_algs[] = { { + .init = sha384_base_init, + .update = sha512_arm_update, + .final = sha512_arm_final, + .finup = sha512_arm_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA384_DIGEST_SIZE, + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-arm", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .init = sha512_base_init, + .update = sha512_arm_update, + .final = sha512_arm_final, + .finup = sha512_arm_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA512_DIGEST_SIZE, + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-arm", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha512_arm_mod_init(void) +{ + int err; + + err = crypto_register_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + if (err) + return err; + + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) { + err = crypto_register_shashes(sha512_neon_algs, + ARRAY_SIZE(sha512_neon_algs)); + if (err) + goto err_unregister; + } + return 0; + +err_unregister: + crypto_unregister_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + + return err; +} + +static void __exit sha512_arm_mod_fini(void) +{ + crypto_unregister_shashes(sha512_arm_algs, + ARRAY_SIZE(sha512_arm_algs)); + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) + crypto_unregister_shashes(sha512_neon_algs, + ARRAY_SIZE(sha512_neon_algs)); +} + +module_init(sha512_arm_mod_init); +module_exit(sha512_arm_mod_fini); diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c new file mode 100644 index 0000000..3269368 --- /dev/null +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -0,0 +1,98 @@ +/* + * sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON + * + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/sha512_base.h> +#include <linux/crypto.h> +#include <linux/module.h> + +#include <asm/simd.h> +#include <asm/neon.h> + +#include "sha512.h" + +MODULE_ALIAS_CRYPTO("sha384-neon"); +MODULE_ALIAS_CRYPTO("sha512-neon"); + +asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src, + int blocks); + +static int sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + if (!may_use_simd() || + (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) + return sha512_arm_update(desc, data, len); + + kernel_neon_begin(); + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order_neon); + kernel_neon_end(); + + return 0; +} + +static int sha512_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!may_use_simd()) + return sha512_arm_finup(desc, data, len, out); + + kernel_neon_begin(); + if (len) + sha512_base_do_update(desc, data, len, + (sha512_block_fn *)sha512_block_data_order_neon); + sha512_base_do_finalize(desc, + (sha512_block_fn *)sha512_block_data_order_neon); + kernel_neon_end(); + + return sha512_base_finish(desc, out); +} + +static int sha512_neon_final(struct shash_desc *desc, u8 *out) +{ + return sha512_neon_finup(desc, NULL, 0, out); +} + +struct shash_alg sha512_neon_algs[] = { { + .init = sha384_base_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .finup = sha512_neon_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA384_DIGEST_SIZE, + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + + } +}, { + .init = sha512_base_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .finup = sha512_neon_finup, + .descsize = sizeof(struct sha512_state), + .digestsize = SHA512_DIGEST_SIZE, + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-neon", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; diff --git a/arch/arm/crypto/sha512.h b/arch/arm/crypto/sha512.h new file mode 100644 index 0000000..a75d9a8 --- /dev/null +++ b/arch/arm/crypto/sha512.h @@ -0,0 +1,8 @@ + +int sha512_arm_update(struct shash_desc *desc, const u8 *data, + unsigned int len); + +int sha512_arm_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out); + +extern struct shash_alg sha512_neon_algs[2]; diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c deleted file mode 100644 index b124dce..0000000 --- a/arch/arm/crypto/sha512_neon_glue.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - * Glue code for the SHA512 Secure Hash Algorithm assembly implementation - * using NEON instructions. - * - * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> - * - * This file is based on sha512_ssse3_glue.c: - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen <tim.c.chen@linux.intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/string.h> -#include <crypto/sha.h> -#include <asm/byteorder.h> -#include <asm/simd.h> -#include <asm/neon.h> - - -static const u64 sha512_k[] = { - 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, - 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, - 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, - 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, - 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, - 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, - 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, - 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, - 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, - 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, - 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, - 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, - 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, - 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, - 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, - 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, - 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, - 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, - 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, - 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, - 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, - 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, - 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, - 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, - 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, - 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, - 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, - 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, - 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, - 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, - 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, - 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, - 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, - 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, - 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, - 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, - 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, - 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, - 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, - 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL -}; - - -asmlinkage void sha512_transform_neon(u64 *digest, const void *data, - const u64 k[], unsigned int num_blks); - - -static int sha512_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA512_H0; - sctx->state[1] = SHA512_H1; - sctx->state[2] = SHA512_H2; - sctx->state[3] = SHA512_H3; - sctx->state[4] = SHA512_H4; - sctx->state[5] = SHA512_H5; - sctx->state[6] = SHA512_H6; - sctx->state[7] = SHA512_H7; - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len, unsigned int partial) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int done = 0; - - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - - if (partial) { - done = SHA512_BLOCK_SIZE - partial; - memcpy(sctx->buf + partial, data, done); - sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); - } - - if (len - done >= SHA512_BLOCK_SIZE) { - const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; - - sha512_transform_neon(sctx->state, data + done, sha512_k, - rounds); - - done += rounds * SHA512_BLOCK_SIZE; - } - - memcpy(sctx->buf, data + done, len - done); - - return 0; -} - -static int sha512_neon_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; - int res; - - /* Handle the fast case right here */ - if (partial + len < SHA512_BLOCK_SIZE) { - sctx->count[0] += len; - if (sctx->count[0] < len) - sctx->count[1]++; - memcpy(sctx->buf + partial, data, len); - - return 0; - } - - if (!may_use_simd()) { - res = crypto_sha512_update(desc, data, len); - } else { - kernel_neon_begin(); - res = __sha512_neon_update(desc, data, len, partial); - kernel_neon_end(); - } - - return res; -} - - -/* Add padding and return the message digest. */ -static int sha512_neon_final(struct shash_desc *desc, u8 *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - unsigned int i, index, padlen; - __be64 *dst = (__be64 *)out; - __be64 bits[2]; - static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; - - /* save number of bits */ - bits[1] = cpu_to_be64(sctx->count[0] << 3); - bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); - - /* Pad out to 112 mod 128 and append length */ - index = sctx->count[0] & 0x7f; - padlen = (index < 112) ? (112 - index) : ((128+112) - index); - - if (!may_use_simd()) { - crypto_sha512_update(desc, padding, padlen); - crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); - } else { - kernel_neon_begin(); - /* We need to fill a whole block for __sha512_neon_update() */ - if (padlen <= 112) { - sctx->count[0] += padlen; - if (sctx->count[0] < padlen) - sctx->count[1]++; - memcpy(sctx->buf + index, padding, padlen); - } else { - __sha512_neon_update(desc, padding, padlen, index); - } - __sha512_neon_update(desc, (const u8 *)&bits, - sizeof(bits), 112); - kernel_neon_end(); - } - - /* Store state in digest */ - for (i = 0; i < 8; i++) - dst[i] = cpu_to_be64(sctx->state[i]); - - /* Wipe context */ - memset(sctx, 0, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_export(struct shash_desc *desc, void *out) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha512_neon_import(struct shash_desc *desc, const void *in) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha384_neon_init(struct shash_desc *desc) -{ - struct sha512_state *sctx = shash_desc_ctx(desc); - - sctx->state[0] = SHA384_H0; - sctx->state[1] = SHA384_H1; - sctx->state[2] = SHA384_H2; - sctx->state[3] = SHA384_H3; - sctx->state[4] = SHA384_H4; - sctx->state[5] = SHA384_H5; - sctx->state[6] = SHA384_H6; - sctx->state[7] = SHA384_H7; - - sctx->count[0] = sctx->count[1] = 0; - - return 0; -} - -static int sha384_neon_final(struct shash_desc *desc, u8 *hash) -{ - u8 D[SHA512_DIGEST_SIZE]; - - sha512_neon_final(desc, D); - - memcpy(hash, D, SHA384_DIGEST_SIZE); - memzero_explicit(D, SHA512_DIGEST_SIZE); - - return 0; -} - -static struct shash_alg algs[] = { { - .digestsize = SHA512_DIGEST_SIZE, - .init = sha512_neon_init, - .update = sha512_neon_update, - .final = sha512_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha512", - .cra_driver_name = "sha512-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -}, { - .digestsize = SHA384_DIGEST_SIZE, - .init = sha384_neon_init, - .update = sha512_neon_update, - .final = sha384_neon_final, - .export = sha512_neon_export, - .import = sha512_neon_import, - .descsize = sizeof(struct sha512_state), - .statesize = sizeof(struct sha512_state), - .base = { - .cra_name = "sha384", - .cra_driver_name = "sha384-neon", - .cra_priority = 250, - .cra_flags = CRYPTO_ALG_TYPE_SHASH, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_module = THIS_MODULE, - } -} }; - -static int __init sha512_neon_mod_init(void) -{ - if (!cpu_has_neon()) - return -ENODEV; - - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); -} - -static void __exit sha512_neon_mod_fini(void) -{ - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); -} - -module_init(sha512_neon_mod_init); -module_exit(sha512_neon_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); - -MODULE_ALIAS_CRYPTO("sha512"); -MODULE_ALIAS_CRYPTO("sha384"); diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 6c348df..3303e8a 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -13,7 +13,7 @@ #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/scatterwalk.h> -#include <linux/crypto.h> +#include <crypto/internal/aead.h> #include <linux/module.h> #include "aes-ce-setkey.h" diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index 12dccdb..af4c712 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -69,10 +69,10 @@ static int octeon_md5_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(0x67452301); - mctx->hash[1] = cpu_to_le32(0xefcdab89); - mctx->hash[2] = cpu_to_le32(0x98badcfe); - mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->hash[0] = cpu_to_le32(MD5_H0); + mctx->hash[1] = cpu_to_le32(MD5_H1); + mctx->hash[2] = cpu_to_le32(MD5_H2); + mctx->hash[3] = cpu_to_le32(MD5_H3); mctx->byte_count = 0; return 0; diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c index 452fb4d..9228967 100644 --- a/arch/powerpc/crypto/md5-glue.c +++ b/arch/powerpc/crypto/md5-glue.c @@ -37,10 +37,10 @@ static int ppc_md5_init(struct shash_desc *desc) { struct md5_state *sctx = shash_desc_ctx(desc); - sctx->hash[0] = 0x67452301; - sctx->hash[1] = 0xefcdab89; - sctx->hash[2] = 0x98badcfe; - sctx->hash[3] = 0x10325476; + sctx->hash[0] = MD5_H0; + sctx->hash[1] = MD5_H1; + sctx->hash[2] = MD5_H2; + sctx->hash[3] = MD5_H3; sctx->byte_count = 0; return 0; diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h new file mode 100644 index 0000000..9f8402b --- /dev/null +++ b/arch/powerpc/include/asm/icswx.h @@ -0,0 +1,184 @@ +/* + * ICSWX api + * + * Copyright (C) 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This provides the Initiate Coprocessor Store Word Indexed (ICSWX) + * instruction. This instruction is used to communicate with PowerPC + * coprocessors. This also provides definitions of the structures used + * to communicate with the coprocessor. + * + * The RFC02130: Coprocessor Architecture document is the reference for + * everything in this file unless otherwise noted. + */ +#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ +#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ + +#include <asm/ppc-opcode.h> /* for PPC_ICSWX */ + +/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */ + +#define CCB_VALUE (0x3fffffffffffffff) +#define CCB_ADDRESS (0xfffffffffffffff8) +#define CCB_CM (0x0000000000000007) +#define CCB_CM0 (0x0000000000000004) +#define CCB_CM12 (0x0000000000000003) + +#define CCB_CM0_ALL_COMPLETIONS (0x0) +#define CCB_CM0_LAST_IN_CHAIN (0x4) +#define CCB_CM12_STORE (0x0) +#define CCB_CM12_INTERRUPT (0x1) + +#define CCB_SIZE (0x10) +#define CCB_ALIGN CCB_SIZE + +struct coprocessor_completion_block { + __be64 value; + __be64 address; +} __packed __aligned(CCB_ALIGN); + + +/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */ + +#define CSB_V (0x80) +#define CSB_F (0x04) +#define CSB_CH (0x03) +#define CSB_CE_INCOMPLETE (0x80) +#define CSB_CE_TERMINATION (0x40) +#define CSB_CE_TPBC (0x20) + +#define CSB_CC_SUCCESS (0) +#define CSB_CC_INVALID_ALIGN (1) +#define CSB_CC_OPERAND_OVERLAP (2) +#define CSB_CC_DATA_LENGTH (3) +#define CSB_CC_TRANSLATION (5) +#define CSB_CC_PROTECTION (6) +#define CSB_CC_RD_EXTERNAL (7) +#define CSB_CC_INVALID_OPERAND (8) +#define CSB_CC_PRIVILEGE (9) +#define CSB_CC_INTERNAL (10) +#define CSB_CC_WR_EXTERNAL (12) +#define CSB_CC_NOSPC (13) +#define CSB_CC_EXCESSIVE_DDE (14) +#define CSB_CC_WR_TRANSLATION (15) +#define CSB_CC_WR_PROTECTION (16) +#define CSB_CC_UNKNOWN_CODE (17) +#define CSB_CC_ABORT (18) +#define CSB_CC_TRANSPORT (20) +#define CSB_CC_SEGMENTED_DDL (31) +#define CSB_CC_PROGRESS_POINT (32) +#define CSB_CC_DDE_OVERFLOW (33) +#define CSB_CC_SESSION (34) +#define CSB_CC_PROVISION (36) +#define CSB_CC_CHAIN (37) +#define CSB_CC_SEQUENCE (38) +#define CSB_CC_HW (39) + +#define CSB_SIZE (0x10) +#define CSB_ALIGN CSB_SIZE + +struct coprocessor_status_block { + u8 flags; + u8 cs; + u8 cc; + u8 ce; + __be32 count; + __be64 address; +} __packed __aligned(CSB_ALIGN); + + +/* Chapter 6.5.10 Data-Descriptor List (DDL) + * each list contains one or more Data-Descriptor Entries (DDE) + */ + +#define DDE_P (0x8000) + +#define DDE_SIZE (0x10) +#define DDE_ALIGN DDE_SIZE + +struct data_descriptor_entry { + __be16 flags; + u8 count; + u8 index; + __be32 length; + __be64 address; +} __packed __aligned(DDE_ALIGN); + + +/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */ + +#define CRB_SIZE (0x80) +#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */ + +/* Coprocessor Status Block field + * ADDRESS address of CSB + * C CCB is valid + * AT 0 = addrs are virtual, 1 = addrs are phys + * M enable perf monitor + */ +#define CRB_CSB_ADDRESS (0xfffffffffffffff0) +#define CRB_CSB_C (0x0000000000000008) +#define CRB_CSB_AT (0x0000000000000002) +#define CRB_CSB_M (0x0000000000000001) + +struct coprocessor_request_block { + __be32 ccw; + __be32 flags; + __be64 csb_addr; + + struct data_descriptor_entry source; + struct data_descriptor_entry target; + + struct coprocessor_completion_block ccb; + + u8 reserved[48]; + + struct coprocessor_status_block csb; +} __packed __aligned(CRB_ALIGN); + + +/* RFC02167 Initiate Coprocessor Instructions document + * Chapter 8.2.1.1.1 RS + * Chapter 8.2.3 Coprocessor Directive + * Chapter 8.2.4 Execution + * + * The CCW must be converted to BE before passing to icswx() + */ + +#define CCW_PS (0xff000000) +#define CCW_CT (0x00ff0000) +#define CCW_CD (0x0000ffff) +#define CCW_CL (0x0000c000) + + +/* RFC02167 Initiate Coprocessor Instructions document + * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX) + * Chapter 8.2.4.1 Condition Register 0 + */ + +#define ICSWX_INITIATED (0x8) +#define ICSWX_BUSY (0x4) +#define ICSWX_REJECTED (0x2) + +static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb) +{ + __be64 ccw_reg = ccw; + u32 cr; + + __asm__ __volatile__( + PPC_ICSWX(%1,0,%2) "\n" + "mfcr %0\n" + : "=r" (cr) + : "r" (ccw_reg), "r" (crb) + : "cr0", "memory"); + + return (int)((cr >> 28) & 0xf); +} + + +#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */ diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 5c93f69..8452335 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -136,6 +136,8 @@ #define PPC_INST_DCBAL 0x7c2005ec #define PPC_INST_DCBZL 0x7c2007ec #define PPC_INST_ICBT 0x7c00002c +#define PPC_INST_ICSWX 0x7c00032d +#define PPC_INST_ICSWEPX 0x7c00076d #define PPC_INST_ISEL 0x7c00001e #define PPC_INST_ISEL_MASK 0xfc00003e #define PPC_INST_LDARX 0x7c0000a8 @@ -403,4 +405,15 @@ #define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \ TMRN(tmr) | ___PPC_RT(r)) +/* Coprocessor instructions */ +#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_INST_ICSWX | \ + ___PPC_RS(s) | \ + ___PPC_RA(a) | \ + ___PPC_RB(b)) +#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_INST_ICSWEPX | \ + ___PPC_RS(s) | \ + ___PPC_RA(a) | \ + ___PPC_RB(b)) + + #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 308c5e1..ea2cea7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np) } return -1; } +EXPORT_SYMBOL(of_get_ibm_chip_id); /** * cpu_to_chip_id - Return the cpus chip-id diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index b688731..c9d2b92 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -33,10 +33,10 @@ static int md5_sparc64_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(0x67452301); - mctx->hash[1] = cpu_to_le32(0xefcdab89); - mctx->hash[2] = cpu_to_le32(0x98badcfe); - mctx->hash[3] = cpu_to_le32(0x10325476); + mctx->hash[0] = cpu_to_le32(MD5_H0); + mctx->hash[1] = cpu_to_le32(MD5_H1); + mctx->hash[2] = cpu_to_le32(MD5_H2); + mctx->hash[3] = cpu_to_le32(MD5_H3); mctx->byte_count = 0; return 0; diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefa..2ade2400 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -807,8 +807,9 @@ static int rfc4106_init(struct crypto_tfm *tfm) child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child); memcpy(child_ctx, ctx, sizeof(*ctx)); ctx->cryptd_tfm = cryptd_tfm; - tfm->crt_aead.reqsize = sizeof(struct aead_request) - + crypto_aead_reqsize(&cryptd_tfm->base); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request) + + crypto_aead_reqsize(&cryptd_tfm->base)); return 0; } diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index e510b1c..0cb5149 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -885,7 +885,8 @@ static int __init sha1_mb_mod_init(void) INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); cpu_state->cpu = cpu; cpu_state->alg_state = &sha1_mb_alg_state; - cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL); + cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr), + GFP_KERNEL); if (!cpu_state->mgr) goto err2; sha1_ctx_mgr_init(cpu_state->mgr); diff --git a/crypto/842.c b/crypto/842.c index b48f4f1..98e387e 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -1,5 +1,5 @@ /* - * Cryptographic API for the 842 compression algorithm. + * Cryptographic API for the 842 software compression algorithm. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -11,173 +11,73 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * Copyright (C) IBM Corporation, 2011-2015 * - * Copyright (C) IBM Corporation, 2011 + * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> * - * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> - * Seth Jennings <sjenning@linux.vnet.ibm.com> + * Rewrite: Dan Streetman <ddstreet@ieee.org> + * + * This is the software implementation of compression and decompression using + * the 842 format. This uses the software 842 library at lib/842/ which is + * only a reference implementation, and is very, very slow as compared to other + * software compressors. You probably do not want to use this software + * compression. If you have access to the PowerPC 842 compression hardware, you + * want to use the 842 hardware compression interface, which is at: + * drivers/crypto/nx/nx-842-crypto.c */ #include <linux/init.h> #include <linux/module.h> #include <linux/crypto.h> -#include <linux/vmalloc.h> -#include <linux/nx842.h> -#include <linux/lzo.h> -#include <linux/timer.h> - -static int nx842_uselzo; - -struct nx842_ctx { - void *nx842_wmem; /* working memory for 842/lzo */ -}; +#include <linux/sw842.h> -enum nx842_crypto_type { - NX842_CRYPTO_TYPE_842, - NX842_CRYPTO_TYPE_LZO +struct crypto842_ctx { + char wmem[SW842_MEM_COMPRESS]; /* working memory for compress */ }; -#define NX842_SENTINEL 0xdeadbeef - -struct nx842_crypto_header { - unsigned int sentinel; /* debug */ - enum nx842_crypto_type type; -}; - -static int nx842_init(struct crypto_tfm *tfm) -{ - struct nx842_ctx *ctx = crypto_tfm_ctx(tfm); - int wmemsize; - - wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS); - ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS); - if (!ctx->nx842_wmem) - return -ENOMEM; - - return 0; -} - -static void nx842_exit(struct crypto_tfm *tfm) -{ - struct nx842_ctx *ctx = crypto_tfm_ctx(tfm); - - kfree(ctx->nx842_wmem); -} - -static void nx842_reset_uselzo(unsigned long data) +static int crypto842_compress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) { - nx842_uselzo = 0; -} - -static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0); - -static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) -{ - struct nx842_ctx *ctx = crypto_tfm_ctx(tfm); - struct nx842_crypto_header *hdr; - unsigned int tmp_len = *dlen; - size_t lzodlen; /* needed for lzo */ - int err; - - *dlen = 0; - hdr = (struct nx842_crypto_header *)dst; - hdr->sentinel = NX842_SENTINEL; /* debug */ - dst += sizeof(struct nx842_crypto_header); - tmp_len -= sizeof(struct nx842_crypto_header); - lzodlen = tmp_len; - - if (likely(!nx842_uselzo)) { - err = nx842_compress(src, slen, dst, &tmp_len, ctx->nx842_wmem); - - if (likely(!err)) { - hdr->type = NX842_CRYPTO_TYPE_842; - *dlen = tmp_len + sizeof(struct nx842_crypto_header); - return 0; - } - - /* hardware failed */ - nx842_uselzo = 1; + struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm); - /* set timer to check for hardware again in 1 second */ - mod_timer(&failover_timer, jiffies + msecs_to_jiffies(1000)); - } - - /* no hardware, use lzo */ - err = lzo1x_1_compress(src, slen, dst, &lzodlen, ctx->nx842_wmem); - if (err != LZO_E_OK) - return -EINVAL; - - hdr->type = NX842_CRYPTO_TYPE_LZO; - *dlen = lzodlen + sizeof(struct nx842_crypto_header); - return 0; + return sw842_compress(src, slen, dst, dlen, ctx->wmem); } -static int nx842_crypto_decompress(struct crypto_tfm *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen) +static int crypto842_decompress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) { - struct nx842_ctx *ctx = crypto_tfm_ctx(tfm); - struct nx842_crypto_header *hdr; - unsigned int tmp_len = *dlen; - size_t lzodlen; /* needed for lzo */ - int err; - - *dlen = 0; - hdr = (struct nx842_crypto_header *)src; - - if (unlikely(hdr->sentinel != NX842_SENTINEL)) - return -EINVAL; - - src += sizeof(struct nx842_crypto_header); - slen -= sizeof(struct nx842_crypto_header); - - if (likely(hdr->type == NX842_CRYPTO_TYPE_842)) { - err = nx842_decompress(src, slen, dst, &tmp_len, - ctx->nx842_wmem); - if (err) - return -EINVAL; - *dlen = tmp_len; - } else if (hdr->type == NX842_CRYPTO_TYPE_LZO) { - lzodlen = tmp_len; - err = lzo1x_decompress_safe(src, slen, dst, &lzodlen); - if (err != LZO_E_OK) - return -EINVAL; - *dlen = lzodlen; - } else - return -EINVAL; - - return 0; + return sw842_decompress(src, slen, dst, dlen); } static struct crypto_alg alg = { .cra_name = "842", + .cra_driver_name = "842-generic", + .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, - .cra_ctxsize = sizeof(struct nx842_ctx), + .cra_ctxsize = sizeof(struct crypto842_ctx), .cra_module = THIS_MODULE, - .cra_init = nx842_init, - .cra_exit = nx842_exit, .cra_u = { .compress = { - .coa_compress = nx842_crypto_compress, - .coa_decompress = nx842_crypto_decompress } } + .coa_compress = crypto842_compress, + .coa_decompress = crypto842_decompress } } }; -static int __init nx842_mod_init(void) +static int __init crypto842_mod_init(void) { - del_timer(&failover_timer); return crypto_register_alg(&alg); } +module_init(crypto842_mod_init); -static void __exit nx842_mod_exit(void) +static void __exit crypto842_mod_exit(void) { crypto_unregister_alg(&alg); } - -module_init(nx842_mod_init); -module_exit(nx842_mod_exit); +module_exit(crypto842_mod_exit); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("842 Compression Algorithm"); +MODULE_DESCRIPTION("842 Software Compression Algorithm"); MODULE_ALIAS_CRYPTO("842"); +MODULE_ALIAS_CRYPTO("842-generic"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/crypto/Kconfig b/crypto/Kconfig index 362905e..0ff4cd4 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -221,11 +221,22 @@ config CRYPTO_SEQIV tristate "Sequence Number IV Generator" select CRYPTO_AEAD select CRYPTO_BLKCIPHER + select CRYPTO_NULL select CRYPTO_RNG help This IV generator generates an IV based on a sequence number by xoring it with a salt. This algorithm is mainly useful for CTR +config CRYPTO_ECHAINIV + tristate "Encrypted Chain IV Generator" + select CRYPTO_AEAD + select CRYPTO_NULL + select CRYPTO_RNG + help + This IV generator generates an IV based on the encryption of + a sequence number xored with a salt. This is the default + algorithm for CBC. + comment "Block modes" config CRYPTO_CBC @@ -1412,10 +1423,9 @@ config CRYPTO_LZO config CRYPTO_842 tristate "842 compression algorithm" - depends on CRYPTO_DEV_NX_COMPRESS - # 842 uses lzo if the hardware becomes unavailable - select LZO_COMPRESS - select LZO_DECOMPRESS + select CRYPTO_ALGAPI + select 842_COMPRESS + select 842_DECOMPRESS help This is the 842 algorithm. @@ -1479,9 +1489,19 @@ config CRYPTO_DRBG tristate default CRYPTO_DRBG_MENU if (CRYPTO_DRBG_HMAC || CRYPTO_DRBG_HASH || CRYPTO_DRBG_CTR) select CRYPTO_RNG + select CRYPTO_JITTERENTROPY endif # if CRYPTO_DRBG_MENU +config CRYPTO_JITTERENTROPY + tristate "Jitterentropy Non-Deterministic Random Number Generator" + help + The Jitterentropy RNG is a noise that is intended + to provide seed to another RNG. The RNG does not + perform any cryptographic whitening of the generated + random numbers. This Jitterentropy RNG registers with + the kernel crypto API and can be used by any caller. + config CRYPTO_USER_API tristate diff --git a/crypto/Makefile b/crypto/Makefile index 97b7d3a..5db5b95 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o +obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o crypto_hash-y += ahash.o crypto_hash-y += shash.o @@ -94,6 +95,8 @@ obj-$(CONFIG_CRYPTO_RNG2) += rng.o obj-$(CONFIG_CRYPTO_RNG2) += krng.o obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o obj-$(CONFIG_CRYPTO_DRBG) += drbg.o +CFLAGS_jitterentropy.o = -O0 +obj-$(CONFIG_CRYPTO_JITTERENTROPY) += jitterentropy.o obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index db201bca..b15d797 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -586,6 +586,13 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask) if (!tmpl) goto kill_larval; + if (tmpl->create) { + err = tmpl->create(tmpl, tb); + if (err) + goto put_tmpl; + goto ok; + } + inst = tmpl->alloc(tb); err = PTR_ERR(inst); if (IS_ERR(inst)) @@ -597,6 +604,7 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask) goto put_tmpl; } +ok: /* Redo the lookup to use the instance we just registered. */ err = -EAGAIN; @@ -636,7 +644,7 @@ struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask) if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_GIVCIPHER) { - if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) { + if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) { crypto_mod_put(alg); alg = ERR_PTR(-ENOENT); } diff --git a/crypto/aead.c b/crypto/aead.c index 2222710..7c3d725 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -13,6 +13,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> @@ -26,10 +27,12 @@ #include "internal.h" +static int aead_null_givencrypt(struct aead_givcrypt_request *req); +static int aead_null_givdecrypt(struct aead_givcrypt_request *req); + static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { - struct aead_alg *aead = crypto_aead_alg(tfm); unsigned long alignmask = crypto_aead_alignmask(tfm); int ret; u8 *buffer, *alignbuffer; @@ -42,47 +45,94 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key, alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); memcpy(alignbuffer, key, keylen); - ret = aead->setkey(tfm, alignbuffer, keylen); + ret = tfm->setkey(tfm, alignbuffer, keylen); memset(alignbuffer, 0, keylen); kfree(buffer); return ret; } -static int setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) +int crypto_aead_setkey(struct crypto_aead *tfm, + const u8 *key, unsigned int keylen) { - struct aead_alg *aead = crypto_aead_alg(tfm); unsigned long alignmask = crypto_aead_alignmask(tfm); + tfm = tfm->child; + if ((unsigned long)key & alignmask) return setkey_unaligned(tfm, key, keylen); - return aead->setkey(tfm, key, keylen); + return tfm->setkey(tfm, key, keylen); } +EXPORT_SYMBOL_GPL(crypto_aead_setkey); int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - struct aead_tfm *crt = crypto_aead_crt(tfm); int err; - if (authsize > crypto_aead_alg(tfm)->maxauthsize) + if (authsize > crypto_aead_maxauthsize(tfm)) return -EINVAL; - if (crypto_aead_alg(tfm)->setauthsize) { - err = crypto_aead_alg(tfm)->setauthsize(crt->base, authsize); + if (tfm->setauthsize) { + err = tfm->setauthsize(tfm->child, authsize); if (err) return err; } - crypto_aead_crt(crt->base)->authsize = authsize; - crt->authsize = authsize; + tfm->child->authsize = authsize; + tfm->authsize = authsize; return 0; } EXPORT_SYMBOL_GPL(crypto_aead_setauthsize); -static unsigned int crypto_aead_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) +struct aead_old_request { + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct aead_request subreq; +}; + +unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) +{ + return tfm->reqsize + sizeof(struct aead_old_request); +} +EXPORT_SYMBOL_GPL(crypto_aead_reqsize); + +static int old_crypt(struct aead_request *req, + int (*crypt)(struct aead_request *req)) { - return alg->cra_ctxsize; + struct aead_old_request *nreq = aead_request_ctx(req); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct scatterlist *src, *dst; + + if (req->old) + return crypt(req); + + src = scatterwalk_ffwd(nreq->srcbuf, req->src, req->assoclen); + dst = scatterwalk_ffwd(nreq->dstbuf, req->dst, req->assoclen); + + aead_request_set_tfm(&nreq->subreq, aead); + aead_request_set_callback(&nreq->subreq, aead_request_flags(req), + req->base.complete, req->base.data); + aead_request_set_crypt(&nreq->subreq, src, dst, req->cryptlen, + req->iv); + aead_request_set_assoc(&nreq->subreq, req->src, req->assoclen); + + return crypt(&nreq->subreq); +} + +static int old_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct old_aead_alg *alg = crypto_old_aead_alg(aead); + + return old_crypt(req, alg->encrypt); +} + +static int old_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct old_aead_alg *alg = crypto_old_aead_alg(aead); + + return old_crypt(req, alg->decrypt); } static int no_givcrypt(struct aead_givcrypt_request *req) @@ -90,32 +140,54 @@ static int no_givcrypt(struct aead_givcrypt_request *req) return -ENOSYS; } -static int crypto_init_aead_ops(struct crypto_tfm *tfm, u32 type, u32 mask) +static int crypto_old_aead_init_tfm(struct crypto_tfm *tfm) { - struct aead_alg *alg = &tfm->__crt_alg->cra_aead; - struct aead_tfm *crt = &tfm->crt_aead; + struct old_aead_alg *alg = &tfm->__crt_alg->cra_aead; + struct crypto_aead *crt = __crypto_aead_cast(tfm); if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8) return -EINVAL; - crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ? - alg->setkey : setkey; - crt->encrypt = alg->encrypt; - crt->decrypt = alg->decrypt; - crt->givencrypt = alg->givencrypt ?: no_givcrypt; - crt->givdecrypt = alg->givdecrypt ?: no_givcrypt; - crt->base = __crypto_aead_cast(tfm); - crt->ivsize = alg->ivsize; + crt->setkey = alg->setkey; + crt->setauthsize = alg->setauthsize; + crt->encrypt = old_encrypt; + crt->decrypt = old_decrypt; + if (alg->ivsize) { + crt->givencrypt = alg->givencrypt ?: no_givcrypt; + crt->givdecrypt = alg->givdecrypt ?: no_givcrypt; + } else { + crt->givencrypt = aead_null_givencrypt; + crt->givdecrypt = aead_null_givdecrypt; + } + crt->child = __crypto_aead_cast(tfm); crt->authsize = alg->maxauthsize; return 0; } +static int crypto_aead_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_aead *aead = __crypto_aead_cast(tfm); + struct aead_alg *alg = crypto_aead_alg(aead); + + if (crypto_old_aead_alg(aead)->encrypt) + return crypto_old_aead_init_tfm(tfm); + + aead->setkey = alg->setkey; + aead->setauthsize = alg->setauthsize; + aead->encrypt = alg->encrypt; + aead->decrypt = alg->decrypt; + aead->child = __crypto_aead_cast(tfm); + aead->authsize = alg->maxauthsize; + + return 0; +} + #ifdef CONFIG_NET -static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) +static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_aead raead; - struct aead_alg *aead = &alg->cra_aead; + struct old_aead_alg *aead = &alg->cra_aead; strncpy(raead.type, "aead", sizeof(raead.type)); strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv)); @@ -133,6 +205,64 @@ nla_put_failure: return -EMSGSIZE; } #else +static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + return -ENOSYS; +} +#endif + +static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg) + __attribute__ ((unused)); +static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg) +{ + struct old_aead_alg *aead = &alg->cra_aead; + + seq_printf(m, "type : aead\n"); + seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? + "yes" : "no"); + seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); + seq_printf(m, "ivsize : %u\n", aead->ivsize); + seq_printf(m, "maxauthsize : %u\n", aead->maxauthsize); + seq_printf(m, "geniv : %s\n", aead->geniv ?: "<built-in>"); +} + +const struct crypto_type crypto_aead_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_aead_init_tfm, +#ifdef CONFIG_PROC_FS + .show = crypto_old_aead_show, +#endif + .report = crypto_old_aead_report, + .lookup = crypto_lookup_aead, + .maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV), + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_AEAD, + .tfmsize = offsetof(struct crypto_aead, base), +}; +EXPORT_SYMBOL_GPL(crypto_aead_type); + +#ifdef CONFIG_NET +static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) +{ + struct crypto_report_aead raead; + struct aead_alg *aead = container_of(alg, struct aead_alg, base); + + strncpy(raead.type, "aead", sizeof(raead.type)); + strncpy(raead.geniv, "<none>", sizeof(raead.geniv)); + + raead.blocksize = alg->cra_blocksize; + raead.maxauthsize = aead->maxauthsize; + raead.ivsize = aead->ivsize; + + if (nla_put(skb, CRYPTOCFGA_REPORT_AEAD, + sizeof(struct crypto_report_aead), &raead)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) { return -ENOSYS; @@ -143,7 +273,7 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) __attribute__ ((unused)); static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) { - struct aead_alg *aead = &alg->cra_aead; + struct aead_alg *aead = container_of(alg, struct aead_alg, base); seq_printf(m, "type : aead\n"); seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? @@ -151,18 +281,21 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg) seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "ivsize : %u\n", aead->ivsize); seq_printf(m, "maxauthsize : %u\n", aead->maxauthsize); - seq_printf(m, "geniv : %s\n", aead->geniv ?: "<built-in>"); + seq_printf(m, "geniv : <none>\n"); } -const struct crypto_type crypto_aead_type = { - .ctxsize = crypto_aead_ctxsize, - .init = crypto_init_aead_ops, +static const struct crypto_type crypto_new_aead_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_aead_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_aead_show, #endif .report = crypto_aead_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_AEAD, + .tfmsize = offsetof(struct crypto_aead, base), }; -EXPORT_SYMBOL_GPL(crypto_aead_type); static int aead_null_givencrypt(struct aead_givcrypt_request *req) { @@ -174,33 +307,11 @@ static int aead_null_givdecrypt(struct aead_givcrypt_request *req) return crypto_aead_decrypt(&req->areq); } -static int crypto_init_nivaead_ops(struct crypto_tfm *tfm, u32 type, u32 mask) -{ - struct aead_alg *alg = &tfm->__crt_alg->cra_aead; - struct aead_tfm *crt = &tfm->crt_aead; - - if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8) - return -EINVAL; - - crt->setkey = setkey; - crt->encrypt = alg->encrypt; - crt->decrypt = alg->decrypt; - if (!alg->ivsize) { - crt->givencrypt = aead_null_givencrypt; - crt->givdecrypt = aead_null_givdecrypt; - } - crt->base = __crypto_aead_cast(tfm); - crt->ivsize = alg->ivsize; - crt->authsize = alg->maxauthsize; - - return 0; -} - #ifdef CONFIG_NET static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_aead raead; - struct aead_alg *aead = &alg->cra_aead; + struct old_aead_alg *aead = &alg->cra_aead; strncpy(raead.type, "nivaead", sizeof(raead.type)); strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv)); @@ -229,7 +340,7 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg) __attribute__ ((unused)); static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg) { - struct aead_alg *aead = &alg->cra_aead; + struct old_aead_alg *aead = &alg->cra_aead; seq_printf(m, "type : nivaead\n"); seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? @@ -241,43 +352,36 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg) } const struct crypto_type crypto_nivaead_type = { - .ctxsize = crypto_aead_ctxsize, - .init = crypto_init_nivaead_ops, + .extsize = crypto_alg_extsize, + .init_tfm = crypto_aead_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_nivaead_show, #endif .report = crypto_nivaead_report, + .maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV), + .maskset = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV, + .type = CRYPTO_ALG_TYPE_AEAD, + .tfmsize = offsetof(struct crypto_aead, base), }; EXPORT_SYMBOL_GPL(crypto_nivaead_type); static int crypto_grab_nivaead(struct crypto_aead_spawn *spawn, const char *name, u32 type, u32 mask) { - struct crypto_alg *alg; - int err; - - type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - type |= CRYPTO_ALG_TYPE_AEAD; - mask |= CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV; - - alg = crypto_alg_mod_lookup(name, type, mask); - if (IS_ERR(alg)) - return PTR_ERR(alg); - - err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask); - crypto_mod_put(alg); - return err; + spawn->base.frontend = &crypto_nivaead_type; + return crypto_grab_spawn(&spawn->base, name, type, mask); } -struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl, - struct rtattr **tb, u32 type, - u32 mask) +struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, + struct rtattr **tb, u32 type, u32 mask) { const char *name; struct crypto_aead_spawn *spawn; struct crypto_attr_type *algt; - struct crypto_instance *inst; - struct crypto_alg *alg; + struct aead_instance *inst; + struct aead_alg *alg; + unsigned int ivsize; + unsigned int maxauthsize; int err; algt = crypto_get_attr_type(tb); @@ -296,20 +400,23 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl, if (!inst) return ERR_PTR(-ENOMEM); - spawn = crypto_instance_ctx(inst); + spawn = aead_instance_ctx(inst); /* Ignore async algorithms if necessary. */ mask |= crypto_requires_sync(algt->type, algt->mask); - crypto_set_aead_spawn(spawn, inst); + crypto_set_aead_spawn(spawn, aead_crypto_instance(inst)); err = crypto_grab_nivaead(spawn, name, type, mask); if (err) goto err_free_inst; - alg = crypto_aead_spawn_alg(spawn); + alg = crypto_spawn_aead_alg(spawn); + + ivsize = crypto_aead_alg_ivsize(alg); + maxauthsize = crypto_aead_alg_maxauthsize(alg); err = -EINVAL; - if (!alg->cra_aead.ivsize) + if (!ivsize) goto err_drop_alg; /* @@ -318,39 +425,54 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl, * template name and double-check the IV generator. */ if (algt->mask & CRYPTO_ALG_GENIV) { - if (strcmp(tmpl->name, alg->cra_aead.geniv)) + if (!alg->base.cra_aead.encrypt) + goto err_drop_alg; + if (strcmp(tmpl->name, alg->base.cra_aead.geniv)) goto err_drop_alg; - memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); - memcpy(inst->alg.cra_driver_name, alg->cra_driver_name, + memcpy(inst->alg.base.cra_name, alg->base.cra_name, CRYPTO_MAX_ALG_NAME); - } else { - err = -ENAMETOOLONG; - if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, - "%s(%s)", tmpl->name, alg->cra_name) >= - CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "%s(%s)", tmpl->name, alg->cra_driver_name) >= - CRYPTO_MAX_ALG_NAME) - goto err_drop_alg; + memcpy(inst->alg.base.cra_driver_name, + alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME); + + inst->alg.base.cra_flags = CRYPTO_ALG_TYPE_AEAD | + CRYPTO_ALG_GENIV; + inst->alg.base.cra_flags |= alg->base.cra_flags & + CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = alg->base.cra_blocksize; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; + inst->alg.base.cra_type = &crypto_aead_type; + + inst->alg.base.cra_aead.ivsize = ivsize; + inst->alg.base.cra_aead.maxauthsize = maxauthsize; + + inst->alg.base.cra_aead.setkey = alg->base.cra_aead.setkey; + inst->alg.base.cra_aead.setauthsize = + alg->base.cra_aead.setauthsize; + inst->alg.base.cra_aead.encrypt = alg->base.cra_aead.encrypt; + inst->alg.base.cra_aead.decrypt = alg->base.cra_aead.decrypt; + + goto out; } - inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV; - inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; - inst->alg.cra_priority = alg->cra_priority; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - inst->alg.cra_type = &crypto_aead_type; + err = -ENAMETOOLONG; + if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, + "%s(%s)", tmpl->name, alg->base.cra_name) >= + CRYPTO_MAX_ALG_NAME) + goto err_drop_alg; + if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "%s(%s)", tmpl->name, alg->base.cra_driver_name) >= + CRYPTO_MAX_ALG_NAME) + goto err_drop_alg; - inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize; - inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize; - inst->alg.cra_aead.geniv = alg->cra_aead.geniv; + inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; + inst->alg.base.cra_priority = alg->base.cra_priority; + inst->alg.base.cra_blocksize = alg->base.cra_blocksize; + inst->alg.base.cra_alignmask = alg->base.cra_alignmask; - inst->alg.cra_aead.setkey = alg->cra_aead.setkey; - inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize; - inst->alg.cra_aead.encrypt = alg->cra_aead.encrypt; - inst->alg.cra_aead.decrypt = alg->cra_aead.decrypt; + inst->alg.ivsize = ivsize; + inst->alg.maxauthsize = maxauthsize; out: return inst; @@ -364,9 +486,9 @@ err_free_inst: } EXPORT_SYMBOL_GPL(aead_geniv_alloc); -void aead_geniv_free(struct crypto_instance *inst) +void aead_geniv_free(struct aead_instance *inst) { - crypto_drop_aead(crypto_instance_ctx(inst)); + crypto_drop_aead(aead_instance_ctx(inst)); kfree(inst); } EXPORT_SYMBOL_GPL(aead_geniv_free); @@ -374,14 +496,17 @@ EXPORT_SYMBOL_GPL(aead_geniv_free); int aead_geniv_init(struct crypto_tfm *tfm) { struct crypto_instance *inst = (void *)tfm->__crt_alg; + struct crypto_aead *child; struct crypto_aead *aead; - aead = crypto_spawn_aead(crypto_instance_ctx(inst)); - if (IS_ERR(aead)) - return PTR_ERR(aead); + aead = __crypto_aead_cast(tfm); + + child = crypto_spawn_aead(crypto_instance_ctx(inst)); + if (IS_ERR(child)) + return PTR_ERR(child); - tfm->crt_aead.base = aead; - tfm->crt_aead.reqsize += crypto_aead_reqsize(aead); + aead->child = child; + aead->reqsize += crypto_aead_reqsize(child); return 0; } @@ -389,7 +514,7 @@ EXPORT_SYMBOL_GPL(aead_geniv_init); void aead_geniv_exit(struct crypto_tfm *tfm) { - crypto_free_aead(tfm->crt_aead.base); + crypto_free_aead(__crypto_aead_cast(tfm)->child); } EXPORT_SYMBOL_GPL(aead_geniv_exit); @@ -443,6 +568,13 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask) if (!tmpl) goto kill_larval; + if (tmpl->create) { + err = tmpl->create(tmpl, tb); + if (err) + goto put_tmpl; + goto ok; + } + inst = tmpl->alloc(tb); err = PTR_ERR(inst); if (IS_ERR(inst)) @@ -454,6 +586,7 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask) goto put_tmpl; } +ok: /* Redo the lookup to use the instance we just registered. */ err = -EAGAIN; @@ -489,7 +622,7 @@ struct crypto_alg *crypto_lookup_aead(const char *name, u32 type, u32 mask) return alg; if (alg->cra_type == &crypto_aead_type) { - if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) { + if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) { crypto_mod_put(alg); alg = ERR_PTR(-ENOENT); } @@ -505,62 +638,62 @@ EXPORT_SYMBOL_GPL(crypto_lookup_aead); int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name, u32 type, u32 mask) { - struct crypto_alg *alg; - int err; + spawn->base.frontend = &crypto_aead_type; + return crypto_grab_spawn(&spawn->base, name, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_grab_aead); - type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - type |= CRYPTO_ALG_TYPE_AEAD; - mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - mask |= CRYPTO_ALG_TYPE_MASK; +struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_aead_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_aead); - alg = crypto_lookup_aead(name, type, mask); - if (IS_ERR(alg)) - return PTR_ERR(alg); +static int aead_prepare_alg(struct aead_alg *alg) +{ + struct crypto_alg *base = &alg->base; - err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask); - crypto_mod_put(alg); - return err; + if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8) + return -EINVAL; + + base->cra_type = &crypto_new_aead_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_AEAD; + + return 0; } -EXPORT_SYMBOL_GPL(crypto_grab_aead); -struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask) +int crypto_register_aead(struct aead_alg *alg) { - struct crypto_tfm *tfm; + struct crypto_alg *base = &alg->base; int err; - type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - type |= CRYPTO_ALG_TYPE_AEAD; - mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); - mask |= CRYPTO_ALG_TYPE_MASK; - - for (;;) { - struct crypto_alg *alg; + err = aead_prepare_alg(alg); + if (err) + return err; - alg = crypto_lookup_aead(alg_name, type, mask); - if (IS_ERR(alg)) { - err = PTR_ERR(alg); - goto err; - } + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_aead); - tfm = __crypto_alloc_tfm(alg, type, mask); - if (!IS_ERR(tfm)) - return __crypto_aead_cast(tfm); +int crypto_unregister_aead(struct aead_alg *alg) +{ + return crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_aead); - crypto_mod_put(alg); - err = PTR_ERR(tfm); +int aead_register_instance(struct crypto_template *tmpl, + struct aead_instance *inst) +{ + int err; -err: - if (err != -EAGAIN) - break; - if (signal_pending(current)) { - err = -EINTR; - break; - } - } + err = aead_prepare_alg(&inst->alg); + if (err) + return err; - return ERR_PTR(err); + return crypto_register_instance(tmpl, aead_crypto_instance(inst)); } -EXPORT_SYMBOL_GPL(crypto_alloc_aead); +EXPORT_SYMBOL_GPL(aead_register_instance); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Authenticated Encryption with Associated Data (AEAD)"); diff --git a/crypto/algapi.c b/crypto/algapi.c index d2627a3..abf100c 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -12,6 +12,7 @@ #include <linux/err.h> #include <linux/errno.h> +#include <linux/fips.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/list.h> @@ -43,12 +44,9 @@ static inline int crypto_set_driver_name(struct crypto_alg *alg) static inline void crypto_check_module_sig(struct module *mod) { -#ifdef CONFIG_CRYPTO_FIPS - if (fips_enabled && mod && !mod->sig_ok) + if (fips_enabled && mod && !module_sig_ok(mod)) panic("Module %s signature verification failed in FIPS mode\n", - mod->name); -#endif - return; + module_name(mod)); } static int crypto_check_alg(struct crypto_alg *alg) @@ -614,6 +612,22 @@ out: } EXPORT_SYMBOL_GPL(crypto_init_spawn2); +int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name, + u32 type, u32 mask) +{ + struct crypto_alg *alg; + int err; + + alg = crypto_find_alg(name, spawn->frontend, type, mask); + if (IS_ERR(alg)) + return PTR_ERR(alg); + + err = crypto_init_spawn(spawn, alg, spawn->inst, mask); + crypto_mod_put(alg); + return err; +} +EXPORT_SYMBOL_GPL(crypto_grab_spawn); + void crypto_drop_spawn(struct crypto_spawn *spawn) { if (!spawn->alg) @@ -964,6 +978,12 @@ void crypto_xor(u8 *dst, const u8 *src, unsigned int size) } EXPORT_SYMBOL_GPL(crypto_xor); +unsigned int crypto_alg_extsize(struct crypto_alg *alg) +{ + return alg->cra_ctxsize; +} +EXPORT_SYMBOL_GPL(crypto_alg_extsize); + static int __init crypto_algapi_init(void) { crypto_init_proc(); diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 69abada..a55e4e6 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -13,6 +13,7 @@ * any later version. */ +#include <crypto/aead.h> #include <crypto/scatterwalk.h> #include <crypto/if_alg.h> #include <linux/init.h> diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c index 8109aaa..150c2b6 100644 --- a/crypto/algif_rng.c +++ b/crypto/algif_rng.c @@ -164,7 +164,7 @@ static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen) * Check whether seedlen is of sufficient size is done in RNG * implementations. */ - return crypto_rng_reset(private, (u8 *)seed, seedlen); + return crypto_rng_reset(private, seed, seedlen); } static const struct af_alg_type algif_type_rng = { diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c index 765fe76..eff337c 100644 --- a/crypto/ansi_cprng.c +++ b/crypto/ansi_cprng.c @@ -20,8 +20,6 @@ #include <linux/moduleparam.h> #include <linux/string.h> -#include "internal.h" - #define DEFAULT_PRNG_KEY "0123456789abcdef" #define DEFAULT_PRNG_KSZ 16 #define DEFAULT_BLK_SZ 16 @@ -281,11 +279,11 @@ static void free_prng_context(struct prng_context *ctx) } static int reset_prng_context(struct prng_context *ctx, - unsigned char *key, size_t klen, - unsigned char *V, unsigned char *DT) + const unsigned char *key, size_t klen, + const unsigned char *V, const unsigned char *DT) { int ret; - unsigned char *prng_key; + const unsigned char *prng_key; spin_lock_bh(&ctx->prng_lock); ctx->flags |= PRNG_NEED_RESET; @@ -353,8 +351,9 @@ static void cprng_exit(struct crypto_tfm *tfm) free_prng_context(crypto_tfm_ctx(tfm)); } -static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen) +static int cprng_get_random(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); @@ -367,11 +366,12 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata, * V and KEY are required during reset, and DT is optional, detected * as being present by testing the length of the seed */ -static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +static int cprng_reset(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) { struct prng_context *prng = crypto_rng_ctx(tfm); - u8 *key = seed + DEFAULT_BLK_SZ; - u8 *dt = NULL; + const u8 *key = seed + DEFAULT_BLK_SZ; + const u8 *dt = NULL; if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ) return -EINVAL; @@ -387,18 +387,20 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) } #ifdef CONFIG_CRYPTO_FIPS -static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen) +static int fips_cprng_get_random(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); return get_prng_bytes(rdata, dlen, prng, 1); } -static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +static int fips_cprng_reset(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) { u8 rdata[DEFAULT_BLK_SZ]; - u8 *key = seed + DEFAULT_BLK_SZ; + const u8 *key = seed + DEFAULT_BLK_SZ; int rc; struct prng_context *prng = crypto_rng_ctx(tfm); @@ -424,40 +426,32 @@ out: } #endif -static struct crypto_alg rng_algs[] = { { - .cra_name = "stdrng", - .cra_driver_name = "ansi_cprng", - .cra_priority = 100, - .cra_flags = CRYPTO_ALG_TYPE_RNG, - .cra_ctxsize = sizeof(struct prng_context), - .cra_type = &crypto_rng_type, - .cra_module = THIS_MODULE, - .cra_init = cprng_init, - .cra_exit = cprng_exit, - .cra_u = { - .rng = { - .rng_make_random = cprng_get_random, - .rng_reset = cprng_reset, - .seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ, - } +static struct rng_alg rng_algs[] = { { + .generate = cprng_get_random, + .seed = cprng_reset, + .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, + .base = { + .cra_name = "stdrng", + .cra_driver_name = "ansi_cprng", + .cra_priority = 100, + .cra_ctxsize = sizeof(struct prng_context), + .cra_module = THIS_MODULE, + .cra_init = cprng_init, + .cra_exit = cprng_exit, } #ifdef CONFIG_CRYPTO_FIPS }, { - .cra_name = "fips(ansi_cprng)", - .cra_driver_name = "fips_ansi_cprng", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_RNG, - .cra_ctxsize = sizeof(struct prng_context), - .cra_type = &crypto_rng_type, - .cra_module = THIS_MODULE, - .cra_init = cprng_init, - .cra_exit = cprng_exit, - .cra_u = { - .rng = { - .rng_make_random = fips_cprng_get_random, - .rng_reset = fips_cprng_reset, - .seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ, - } + .generate = fips_cprng_get_random, + .seed = fips_cprng_reset, + .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, + .base = { + .cra_name = "fips(ansi_cprng)", + .cra_driver_name = "fips_ansi_cprng", + .cra_priority = 300, + .cra_ctxsize = sizeof(struct prng_context), + .cra_module = THIS_MODULE, + .cra_init = cprng_init, + .cra_exit = cprng_exit, } #endif } }; @@ -465,12 +459,12 @@ static struct crypto_alg rng_algs[] = { { /* Module initalization */ static int __init prng_mod_init(void) { - return crypto_register_algs(rng_algs, ARRAY_SIZE(rng_algs)); + return crypto_register_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } static void __exit prng_mod_fini(void) { - crypto_unregister_algs(rng_algs, ARRAY_SIZE(rng_algs)); + crypto_unregister_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } MODULE_LICENSE("GPL"); diff --git a/crypto/authenc.c b/crypto/authenc.c index 78fb16c..3e85229 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -10,7 +10,7 @@ * */ -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/authenc.h> @@ -570,13 +570,14 @@ static int crypto_authenc_init_tfm(struct crypto_tfm *tfm) crypto_ahash_alignmask(auth) + 1) + crypto_ablkcipher_ivsize(enc); - tfm->crt_aead.reqsize = sizeof(struct authenc_request_ctx) + - ctx->reqoff + - max_t(unsigned int, - crypto_ahash_reqsize(auth) + - sizeof(struct ahash_request), - sizeof(struct skcipher_givcrypt_request) + - crypto_ablkcipher_reqsize(enc)); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct authenc_request_ctx) + + ctx->reqoff + + max_t(unsigned int, + crypto_ahash_reqsize(auth) + + sizeof(struct ahash_request), + sizeof(struct skcipher_givcrypt_request) + + crypto_ablkcipher_reqsize(enc))); return 0; diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 024bff2..a3da677 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -12,7 +12,7 @@ * */ -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/authenc.h> @@ -662,13 +662,14 @@ static int crypto_authenc_esn_init_tfm(struct crypto_tfm *tfm) crypto_ahash_alignmask(auth) + 1) + crypto_ablkcipher_ivsize(enc); - tfm->crt_aead.reqsize = sizeof(struct authenc_esn_request_ctx) + - ctx->reqoff + - max_t(unsigned int, - crypto_ahash_reqsize(auth) + - sizeof(struct ahash_request), - sizeof(struct skcipher_givcrypt_request) + - crypto_ablkcipher_reqsize(enc)); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct authenc_esn_request_ctx) + + ctx->reqoff + + max_t(unsigned int, + crypto_ahash_reqsize(auth) + + sizeof(struct ahash_request), + sizeof(struct skcipher_givcrypt_request) + + crypto_ablkcipher_reqsize(enc))); return 0; diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 0122bec..11b9814 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -14,6 +14,7 @@ * */ +#include <crypto/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/errno.h> diff --git a/crypto/ccm.c b/crypto/ccm.c index 003bbbd..a4d1a5e 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -453,9 +453,9 @@ static int crypto_ccm_init_tfm(struct crypto_tfm *tfm) align = crypto_tfm_alg_alignmask(tfm); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_aead.reqsize = align + - sizeof(struct crypto_ccm_req_priv_ctx) + - crypto_ablkcipher_reqsize(ctr); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + align + sizeof(struct crypto_ccm_req_priv_ctx) + + crypto_ablkcipher_reqsize(ctr)); return 0; @@ -729,10 +729,10 @@ static int crypto_rfc4309_init_tfm(struct crypto_tfm *tfm) align = crypto_aead_alignmask(aead); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_aead.reqsize = sizeof(struct aead_request) + - ALIGN(crypto_aead_reqsize(aead), - crypto_tfm_ctx_alignment()) + - align + 16; + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request) + + ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) + + align + 16); return 0; } diff --git a/crypto/cryptd.c b/crypto/cryptd.c index b0602ba..4264c8d 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -295,6 +295,23 @@ static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm) crypto_free_blkcipher(ctx->child); } +static int cryptd_init_instance(struct crypto_instance *inst, + struct crypto_alg *alg) +{ + if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, + "cryptd(%s)", + alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); + + inst->alg.cra_priority = alg->cra_priority + 50; + inst->alg.cra_blocksize = alg->cra_blocksize; + inst->alg.cra_alignmask = alg->cra_alignmask; + + return 0; +} + static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, unsigned int tail) { @@ -308,17 +325,10 @@ static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head, inst = (void *)(p + head); - err = -ENAMETOOLONG; - if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, - "cryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) + err = cryptd_init_instance(inst, alg); + if (err) goto out_free_inst; - memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); - - inst->alg.cra_priority = alg->cra_priority + 50; - inst->alg.cra_blocksize = alg->cra_blocksize; - inst->alg.cra_alignmask = alg->cra_alignmask; - out: return p; @@ -729,7 +739,8 @@ static int cryptd_aead_init_tfm(struct crypto_tfm *tfm) crypto_aead_set_flags(cipher, CRYPTO_TFM_REQ_MAY_SLEEP); ctx->child = cipher; - tfm->crt_aead.reqsize = sizeof(struct cryptd_aead_request_ctx); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct cryptd_aead_request_ctx)); return 0; } @@ -746,29 +757,34 @@ static int cryptd_create_aead(struct crypto_template *tmpl, struct aead_instance_ctx *ctx; struct crypto_instance *inst; struct crypto_alg *alg; - u32 type = CRYPTO_ALG_TYPE_AEAD; - u32 mask = CRYPTO_ALG_TYPE_MASK; + const char *name; + u32 type = 0; + u32 mask = 0; int err; cryptd_check_internal(tb, &type, &mask); - alg = crypto_get_attr_alg(tb, type, mask); - if (IS_ERR(alg)) - return PTR_ERR(alg); + name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(name)) + return PTR_ERR(name); - inst = cryptd_alloc_instance(alg, 0, sizeof(*ctx)); - err = PTR_ERR(inst); - if (IS_ERR(inst)) - goto out_put_alg; + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return -ENOMEM; ctx = crypto_instance_ctx(inst); ctx->queue = queue; - err = crypto_init_spawn(&ctx->aead_spawn.base, alg, inst, - CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC); + crypto_set_aead_spawn(&ctx->aead_spawn, inst); + err = crypto_grab_aead(&ctx->aead_spawn, name, type, mask); if (err) goto out_free_inst; + alg = crypto_aead_spawn_alg(&ctx->aead_spawn); + err = cryptd_init_instance(inst, alg); + if (err) + goto out_drop_aead; + type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; if (alg->cra_flags & CRYPTO_ALG_INTERNAL) type |= CRYPTO_ALG_INTERNAL; @@ -789,12 +805,11 @@ static int cryptd_create_aead(struct crypto_template *tmpl, err = crypto_register_instance(tmpl, inst); if (err) { - crypto_drop_spawn(&ctx->aead_spawn.base); +out_drop_aead: + crypto_drop_aead(&ctx->aead_spawn); out_free_inst: kfree(inst); } -out_put_alg: - crypto_mod_put(alg); return err; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index a203191..941c9a4 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -25,6 +25,10 @@ #include <linux/mm.h> #include <linux/string.h> +static DEFINE_MUTEX(crypto_default_null_skcipher_lock); +static struct crypto_blkcipher *crypto_default_null_skcipher; +static int crypto_default_null_skcipher_refcnt; + static int null_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { @@ -149,6 +153,41 @@ MODULE_ALIAS_CRYPTO("compress_null"); MODULE_ALIAS_CRYPTO("digest_null"); MODULE_ALIAS_CRYPTO("cipher_null"); +struct crypto_blkcipher *crypto_get_default_null_skcipher(void) +{ + struct crypto_blkcipher *tfm; + + mutex_lock(&crypto_default_null_skcipher_lock); + tfm = crypto_default_null_skcipher; + + if (!tfm) { + tfm = crypto_alloc_blkcipher("ecb(cipher_null)", 0, 0); + if (IS_ERR(tfm)) + goto unlock; + + crypto_default_null_skcipher = tfm; + } + + crypto_default_null_skcipher_refcnt++; + +unlock: + mutex_unlock(&crypto_default_null_skcipher_lock); + + return tfm; +} +EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher); + +void crypto_put_default_null_skcipher(void) +{ + mutex_lock(&crypto_default_null_skcipher_lock); + if (!--crypto_default_null_skcipher_refcnt) { + crypto_free_blkcipher(crypto_default_null_skcipher); + crypto_default_null_skcipher = NULL; + } + mutex_unlock(&crypto_default_null_skcipher_lock); +} +EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher); + static int __init crypto_null_mod_init(void) { int ret = 0; diff --git a/crypto/drbg.c b/crypto/drbg.c index b69409c..9284348 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -235,7 +235,7 @@ static bool drbg_fips_continuous_test(struct drbg_state *drbg, #ifdef CONFIG_CRYPTO_FIPS int ret = 0; /* skip test if we test the overall system */ - if (drbg->test_data) + if (list_empty(&drbg->test_data.list)) return true; /* only perform test in FIPS mode */ if (0 == fips_enabled) @@ -487,7 +487,7 @@ static int drbg_ctr_df(struct drbg_state *drbg, out: memset(iv, 0, drbg_blocklen(drbg)); - memset(temp, 0, drbg_statelen(drbg)); + memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg)); memset(pad, 0, drbg_blocklen(drbg)); return ret; } @@ -1041,6 +1041,43 @@ static struct drbg_state_ops drbg_hash_ops = { * Functions common for DRBG implementations ******************************************************************/ +static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed, + int reseed) +{ + int ret = drbg->d_ops->update(drbg, seed, reseed); + + if (ret) + return ret; + + drbg->seeded = true; + /* 10.1.1.2 / 10.1.1.3 step 5 */ + drbg->reseed_ctr = 1; + + return ret; +} + +static void drbg_async_seed(struct work_struct *work) +{ + struct drbg_string data; + LIST_HEAD(seedlist); + struct drbg_state *drbg = container_of(work, struct drbg_state, + seed_work); + int ret; + + get_blocking_random_bytes(drbg->seed_buf, drbg->seed_buf_len); + + drbg_string_fill(&data, drbg->seed_buf, drbg->seed_buf_len); + list_add_tail(&data.list, &seedlist); + mutex_lock(&drbg->drbg_mutex); + ret = __drbg_seed(drbg, &seedlist, true); + if (!ret && drbg->jent) { + crypto_free_rng(drbg->jent); + drbg->jent = NULL; + } + memzero_explicit(drbg->seed_buf, drbg->seed_buf_len); + mutex_unlock(&drbg->drbg_mutex); +} + /* * Seeding or reseeding of the DRBG * @@ -1056,8 +1093,6 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, bool reseed) { int ret = 0; - unsigned char *entropy = NULL; - size_t entropylen = 0; struct drbg_string data1; LIST_HEAD(seedlist); @@ -1068,31 +1103,29 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, return -EINVAL; } - if (drbg->test_data && drbg->test_data->testentropy) { - drbg_string_fill(&data1, drbg->test_data->testentropy->buf, - drbg->test_data->testentropy->len); + if (list_empty(&drbg->test_data.list)) { + drbg_string_fill(&data1, drbg->test_data.buf, + drbg->test_data.len); pr_devel("DRBG: using test entropy\n"); } else { - /* - * Gather entropy equal to the security strength of the DRBG. - * With a derivation function, a nonce is required in addition - * to the entropy. A nonce must be at least 1/2 of the security - * strength of the DRBG in size. Thus, entropy * nonce is 3/2 - * of the strength. The consideration of a nonce is only - * applicable during initial seeding. - */ - entropylen = drbg_sec_strength(drbg->core->flags); - if (!entropylen) - return -EFAULT; - if (!reseed) - entropylen = ((entropylen + 1) / 2) * 3; - pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n", - entropylen); - entropy = kzalloc(entropylen, GFP_KERNEL); - if (!entropy) - return -ENOMEM; - get_random_bytes(entropy, entropylen); - drbg_string_fill(&data1, entropy, entropylen); + /* Get seed from in-kernel /dev/urandom */ + get_random_bytes(drbg->seed_buf, drbg->seed_buf_len); + + /* Get seed from Jitter RNG */ + if (!drbg->jent || + crypto_rng_get_bytes(drbg->jent, + drbg->seed_buf + drbg->seed_buf_len, + drbg->seed_buf_len)) { + drbg_string_fill(&data1, drbg->seed_buf, + drbg->seed_buf_len); + pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n", + drbg->seed_buf_len); + } else { + drbg_string_fill(&data1, drbg->seed_buf, + drbg->seed_buf_len * 2); + pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n", + drbg->seed_buf_len * 2); + } } list_add_tail(&data1.list, &seedlist); @@ -1111,16 +1144,28 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers, memset(drbg->C, 0, drbg_statelen(drbg)); } - ret = drbg->d_ops->update(drbg, &seedlist, reseed); + ret = __drbg_seed(drbg, &seedlist, reseed); + + /* + * Clear the initial entropy buffer as the async call may not overwrite + * that buffer for quite some time. + */ + memzero_explicit(drbg->seed_buf, drbg->seed_buf_len * 2); if (ret) goto out; + /* + * For all subsequent seeding calls, we only need the seed buffer + * equal to the security strength of the DRBG. We undo the calculation + * in drbg_alloc_state. + */ + if (!reseed) + drbg->seed_buf_len = drbg->seed_buf_len / 3 * 2; - drbg->seeded = true; - /* 10.1.1.2 / 10.1.1.3 step 5 */ - drbg->reseed_ctr = 1; + /* Invoke asynchronous seeding unless DRBG is in test mode. */ + if (!list_empty(&drbg->test_data.list) && !reseed) + schedule_work(&drbg->seed_work); out: - kzfree(entropy); return ret; } @@ -1136,11 +1181,19 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) kzfree(drbg->scratchpad); drbg->scratchpad = NULL; drbg->reseed_ctr = 0; + drbg->d_ops = NULL; + drbg->core = NULL; #ifdef CONFIG_CRYPTO_FIPS kzfree(drbg->prev); drbg->prev = NULL; drbg->fips_primed = false; #endif + kzfree(drbg->seed_buf); + drbg->seed_buf = NULL; + if (drbg->jent) { + crypto_free_rng(drbg->jent); + drbg->jent = NULL; + } } /* @@ -1152,6 +1205,27 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) int ret = -ENOMEM; unsigned int sb_size = 0; + switch (drbg->core->flags & DRBG_TYPE_MASK) { +#ifdef CONFIG_CRYPTO_DRBG_HMAC + case DRBG_HMAC: + drbg->d_ops = &drbg_hmac_ops; + break; +#endif /* CONFIG_CRYPTO_DRBG_HMAC */ +#ifdef CONFIG_CRYPTO_DRBG_HASH + case DRBG_HASH: + drbg->d_ops = &drbg_hash_ops; + break; +#endif /* CONFIG_CRYPTO_DRBG_HASH */ +#ifdef CONFIG_CRYPTO_DRBG_CTR + case DRBG_CTR: + drbg->d_ops = &drbg_ctr_ops; + break; +#endif /* CONFIG_CRYPTO_DRBG_CTR */ + default: + ret = -EOPNOTSUPP; + goto err; + } + drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL); if (!drbg->V) goto err; @@ -1181,87 +1255,50 @@ static inline int drbg_alloc_state(struct drbg_state *drbg) if (!drbg->scratchpad) goto err; } - spin_lock_init(&drbg->drbg_lock); - return 0; - -err: - drbg_dealloc_state(drbg); - return ret; -} -/* - * Strategy to avoid holding long term locks: generate a shadow copy of DRBG - * and perform all operations on this shadow copy. After finishing, restore - * the updated state of the shadow copy into original drbg state. This way, - * only the read and write operations of the original drbg state must be - * locked - */ -static inline void drbg_copy_drbg(struct drbg_state *src, - struct drbg_state *dst) -{ - if (!src || !dst) - return; - memcpy(dst->V, src->V, drbg_statelen(src)); - memcpy(dst->C, src->C, drbg_statelen(src)); - dst->reseed_ctr = src->reseed_ctr; - dst->seeded = src->seeded; - dst->pr = src->pr; -#ifdef CONFIG_CRYPTO_FIPS - dst->fips_primed = src->fips_primed; - memcpy(dst->prev, src->prev, drbg_blocklen(src)); -#endif /* - * Not copied: - * scratchpad is initialized drbg_alloc_state; - * priv_data is initialized with call to crypto_init; - * d_ops and core are set outside, as these parameters are const; - * test_data is set outside to prevent it being copied back. + * Gather entropy equal to the security strength of the DRBG. + * With a derivation function, a nonce is required in addition + * to the entropy. A nonce must be at least 1/2 of the security + * strength of the DRBG in size. Thus, entropy * nonce is 3/2 + * of the strength. The consideration of a nonce is only + * applicable during initial seeding. */ -} - -static int drbg_make_shadow(struct drbg_state *drbg, struct drbg_state **shadow) -{ - int ret = -ENOMEM; - struct drbg_state *tmp = NULL; - - tmp = kzalloc(sizeof(struct drbg_state), GFP_KERNEL); - if (!tmp) - return -ENOMEM; + drbg->seed_buf_len = drbg_sec_strength(drbg->core->flags); + if (!drbg->seed_buf_len) { + ret = -EFAULT; + goto err; + } + /* + * Ensure we have sufficient buffer space for initial seed which + * consists of the seed from get_random_bytes and the Jitter RNG. + */ + drbg->seed_buf_len = ((drbg->seed_buf_len + 1) / 2) * 3; + drbg->seed_buf = kzalloc(drbg->seed_buf_len * 2, GFP_KERNEL); + if (!drbg->seed_buf) + goto err; - /* read-only data as they are defined as const, no lock needed */ - tmp->core = drbg->core; - tmp->d_ops = drbg->d_ops; + INIT_WORK(&drbg->seed_work, drbg_async_seed); - ret = drbg_alloc_state(tmp); - if (ret) - goto err; + drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0); + if(IS_ERR(drbg->jent)) + { + pr_info("DRBG: could not allocate Jitter RNG handle for seeding\n"); + /* + * As the Jitter RNG is a module that may not be present, we + * continue with the operation and do not fully tie the DRBG + * to the Jitter RNG. + */ + drbg->jent = NULL; + } - spin_lock_bh(&drbg->drbg_lock); - drbg_copy_drbg(drbg, tmp); - /* only make a link to the test buffer, as we only read that data */ - tmp->test_data = drbg->test_data; - spin_unlock_bh(&drbg->drbg_lock); - *shadow = tmp; return 0; err: - kzfree(tmp); + drbg_dealloc_state(drbg); return ret; } -static void drbg_restore_shadow(struct drbg_state *drbg, - struct drbg_state **shadow) -{ - struct drbg_state *tmp = *shadow; - - spin_lock_bh(&drbg->drbg_lock); - drbg_copy_drbg(tmp, drbg); - spin_unlock_bh(&drbg->drbg_lock); - drbg_dealloc_state(tmp); - kzfree(tmp); - *shadow = NULL; -} - /************************************************************************* * DRBG interface functions *************************************************************************/ @@ -1287,14 +1324,12 @@ static int drbg_generate(struct drbg_state *drbg, struct drbg_string *addtl) { int len = 0; - struct drbg_state *shadow = NULL; LIST_HEAD(addtllist); - struct drbg_string timestamp; - union { - cycles_t cycles; - unsigned char char_cycles[sizeof(cycles_t)]; - } now; + if (!drbg->core) { + pr_devel("DRBG: not yet seeded\n"); + return -EINVAL; + } if (0 == buflen || !buf) { pr_devel("DRBG: no output buffer provided\n"); return -EINVAL; @@ -1304,15 +1339,9 @@ static int drbg_generate(struct drbg_state *drbg, return -EINVAL; } - len = drbg_make_shadow(drbg, &shadow); - if (len) { - pr_devel("DRBG: shadow copy cannot be generated\n"); - return len; - } - /* 9.3.1 step 2 */ len = -EINVAL; - if (buflen > (drbg_max_request_bytes(shadow))) { + if (buflen > (drbg_max_request_bytes(drbg))) { pr_devel("DRBG: requested random numbers too large %u\n", buflen); goto err; @@ -1321,7 +1350,7 @@ static int drbg_generate(struct drbg_state *drbg, /* 9.3.1 step 3 is implicit with the chosen DRBG */ /* 9.3.1 step 4 */ - if (addtl && addtl->len > (drbg_max_addtl(shadow))) { + if (addtl && addtl->len > (drbg_max_addtl(drbg))) { pr_devel("DRBG: additional information string too long %zu\n", addtl->len); goto err; @@ -1332,46 +1361,29 @@ static int drbg_generate(struct drbg_state *drbg, * 9.3.1 step 6 and 9 supplemented by 9.3.2 step c is implemented * here. The spec is a bit convoluted here, we make it simpler. */ - if ((drbg_max_requests(shadow)) < shadow->reseed_ctr) - shadow->seeded = false; - - /* allocate cipher handle */ - len = shadow->d_ops->crypto_init(shadow); - if (len) - goto err; + if ((drbg_max_requests(drbg)) < drbg->reseed_ctr) + drbg->seeded = false; - if (shadow->pr || !shadow->seeded) { + if (drbg->pr || !drbg->seeded) { pr_devel("DRBG: reseeding before generation (prediction " "resistance: %s, state %s)\n", drbg->pr ? "true" : "false", drbg->seeded ? "seeded" : "unseeded"); /* 9.3.1 steps 7.1 through 7.3 */ - len = drbg_seed(shadow, addtl, true); + len = drbg_seed(drbg, addtl, true); if (len) goto err; /* 9.3.1 step 7.4 */ addtl = NULL; } - /* - * Mix the time stamp into the DRBG state if the DRBG is not in - * test mode. If there are two callers invoking the DRBG at the same - * time, i.e. before the first caller merges its shadow state back, - * both callers would obtain the same random number stream without - * changing the state here. - */ - if (!drbg->test_data) { - now.cycles = random_get_entropy(); - drbg_string_fill(×tamp, now.char_cycles, sizeof(cycles_t)); - list_add_tail(×tamp.list, &addtllist); - } if (addtl && 0 < addtl->len) list_add_tail(&addtl->list, &addtllist); /* 9.3.1 step 8 and 10 */ - len = shadow->d_ops->generate(shadow, buf, buflen, &addtllist); + len = drbg->d_ops->generate(drbg, buf, buflen, &addtllist); /* 10.1.1.4 step 6, 10.1.2.5 step 7, 10.2.1.5.2 step 7 */ - shadow->reseed_ctr++; + drbg->reseed_ctr++; if (0 >= len) goto err; @@ -1391,7 +1403,7 @@ static int drbg_generate(struct drbg_state *drbg, * case somebody has a need to implement the test of 11.3.3. */ #if 0 - if (shadow->reseed_ctr && !(shadow->reseed_ctr % 4096)) { + if (drbg->reseed_ctr && !(drbg->reseed_ctr % 4096)) { int err = 0; pr_devel("DRBG: start to perform self test\n"); if (drbg->core->flags & DRBG_HMAC) @@ -1410,8 +1422,6 @@ static int drbg_generate(struct drbg_state *drbg, * are returned when reusing this DRBG cipher handle */ drbg_uninstantiate(drbg); - drbg_dealloc_state(shadow); - kzfree(shadow); return 0; } else { pr_devel("DRBG: self test successful\n"); @@ -1425,8 +1435,6 @@ static int drbg_generate(struct drbg_state *drbg, */ len = 0; err: - shadow->d_ops->crypto_fini(shadow); - drbg_restore_shadow(drbg, &shadow); return len; } @@ -1442,19 +1450,21 @@ static int drbg_generate_long(struct drbg_state *drbg, unsigned char *buf, unsigned int buflen, struct drbg_string *addtl) { - int len = 0; + unsigned int len = 0; unsigned int slice = 0; do { - int tmplen = 0; + int err = 0; unsigned int chunk = 0; slice = ((buflen - len) / drbg_max_request_bytes(drbg)); chunk = slice ? drbg_max_request_bytes(drbg) : (buflen - len); - tmplen = drbg_generate(drbg, buf + len, chunk, addtl); - if (0 >= tmplen) - return tmplen; - len += tmplen; + mutex_lock(&drbg->drbg_mutex); + err = drbg_generate(drbg, buf + len, chunk, addtl); + mutex_unlock(&drbg->drbg_mutex); + if (0 > err) + return err; + len += chunk; } while (slice > 0 && (len < buflen)); - return len; + return 0; } /* @@ -1477,32 +1487,12 @@ static int drbg_generate_long(struct drbg_state *drbg, static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, int coreref, bool pr) { - int ret = -ENOMEM; + int ret; + bool reseed = true; pr_devel("DRBG: Initializing DRBG core %d with prediction resistance " "%s\n", coreref, pr ? "enabled" : "disabled"); - drbg->core = &drbg_cores[coreref]; - drbg->pr = pr; - drbg->seeded = false; - switch (drbg->core->flags & DRBG_TYPE_MASK) { -#ifdef CONFIG_CRYPTO_DRBG_HMAC - case DRBG_HMAC: - drbg->d_ops = &drbg_hmac_ops; - break; -#endif /* CONFIG_CRYPTO_DRBG_HMAC */ -#ifdef CONFIG_CRYPTO_DRBG_HASH - case DRBG_HASH: - drbg->d_ops = &drbg_hash_ops; - break; -#endif /* CONFIG_CRYPTO_DRBG_HASH */ -#ifdef CONFIG_CRYPTO_DRBG_CTR - case DRBG_CTR: - drbg->d_ops = &drbg_ctr_ops; - break; -#endif /* CONFIG_CRYPTO_DRBG_CTR */ - default: - return -EOPNOTSUPP; - } + mutex_lock(&drbg->drbg_mutex); /* 9.1 step 1 is implicit with the selected DRBG type */ @@ -1514,22 +1504,36 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers, /* 9.1 step 4 is implicit in drbg_sec_strength */ - ret = drbg_alloc_state(drbg); - if (ret) - return ret; + if (!drbg->core) { + drbg->core = &drbg_cores[coreref]; + drbg->pr = pr; + drbg->seeded = false; - ret = -EFAULT; - if (drbg->d_ops->crypto_init(drbg)) - goto err; - ret = drbg_seed(drbg, pers, false); - drbg->d_ops->crypto_fini(drbg); - if (ret) + ret = drbg_alloc_state(drbg); + if (ret) + goto unlock; + + ret = -EFAULT; + if (drbg->d_ops->crypto_init(drbg)) + goto err; + + reseed = false; + } + + ret = drbg_seed(drbg, pers, reseed); + + if (ret && !reseed) { + drbg->d_ops->crypto_fini(drbg); goto err; + } - return 0; + mutex_unlock(&drbg->drbg_mutex); + return ret; err: drbg_dealloc_state(drbg); +unlock: + mutex_unlock(&drbg->drbg_mutex); return ret; } @@ -1544,10 +1548,11 @@ err: */ static int drbg_uninstantiate(struct drbg_state *drbg) { - spin_lock_bh(&drbg->drbg_lock); + cancel_work_sync(&drbg->seed_work); + if (drbg->d_ops) + drbg->d_ops->crypto_fini(drbg); drbg_dealloc_state(drbg); /* no scrubbing of test_data -- this shall survive an uninstantiate */ - spin_unlock_bh(&drbg->drbg_lock); return 0; } @@ -1555,16 +1560,17 @@ static int drbg_uninstantiate(struct drbg_state *drbg) * Helper function for setting the test data in the DRBG * * @drbg DRBG state handle - * @test_data test data to sets + * @data test data + * @len test data length */ -static inline void drbg_set_testdata(struct drbg_state *drbg, - struct drbg_test_data *test_data) +static void drbg_kcapi_set_entropy(struct crypto_rng *tfm, + const u8 *data, unsigned int len) { - if (!test_data || !test_data->testentropy) - return; - spin_lock_bh(&drbg->drbg_lock); - drbg->test_data = test_data; - spin_unlock_bh(&drbg->drbg_lock); + struct drbg_state *drbg = crypto_rng_ctx(tfm); + + mutex_lock(&drbg->drbg_mutex); + drbg_string_fill(&drbg->test_data, data, len); + mutex_unlock(&drbg->drbg_mutex); } /*************************************************************** @@ -1714,15 +1720,10 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name, static int drbg_kcapi_init(struct crypto_tfm *tfm) { struct drbg_state *drbg = crypto_tfm_ctx(tfm); - bool pr = false; - int coreref = 0; - drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm), &coreref, &pr); - /* - * when personalization string is needed, the caller must call reset - * and provide the personalization string as seed information - */ - return drbg_instantiate(drbg, NULL, coreref, pr); + mutex_init(&drbg->drbg_mutex); + + return 0; } static void drbg_kcapi_cleanup(struct crypto_tfm *tfm) @@ -1734,65 +1735,49 @@ static void drbg_kcapi_cleanup(struct crypto_tfm *tfm) * Generate random numbers invoked by the kernel crypto API: * The API of the kernel crypto API is extended as follows: * - * If dlen is larger than zero, rdata is interpreted as the output buffer - * where random data is to be stored. - * - * If dlen is zero, rdata is interpreted as a pointer to a struct drbg_gen - * which holds the additional information string that is used for the - * DRBG generation process. The output buffer that is to be used to store - * data is also pointed to by struct drbg_gen. + * src is additional input supplied to the RNG. + * slen is the length of src. + * dst is the output buffer where random data is to be stored. + * dlen is the length of dst. */ -static int drbg_kcapi_random(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen) +static int drbg_kcapi_random(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen) { struct drbg_state *drbg = crypto_rng_ctx(tfm); - if (0 < dlen) { - return drbg_generate_long(drbg, rdata, dlen, NULL); - } else { - struct drbg_gen *data = (struct drbg_gen *)rdata; - struct drbg_string addtl; - /* catch NULL pointer */ - if (!data) - return 0; - drbg_set_testdata(drbg, data->test_data); + struct drbg_string *addtl = NULL; + struct drbg_string string; + + if (slen) { /* linked list variable is now local to allow modification */ - drbg_string_fill(&addtl, data->addtl->buf, data->addtl->len); - return drbg_generate_long(drbg, data->outbuf, data->outlen, - &addtl); + drbg_string_fill(&string, src, slen); + addtl = &string; } + + return drbg_generate_long(drbg, dst, dlen, addtl); } /* - * Reset the DRBG invoked by the kernel crypto API - * The reset implies a full re-initialization of the DRBG. Similar to the - * generate function of drbg_kcapi_random, this function extends the - * kernel crypto API interface with struct drbg_gen + * Seed the DRBG invoked by the kernel crypto API */ -static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +static int drbg_kcapi_seed(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) { struct drbg_state *drbg = crypto_rng_ctx(tfm); struct crypto_tfm *tfm_base = crypto_rng_tfm(tfm); bool pr = false; - struct drbg_string seed_string; + struct drbg_string string; + struct drbg_string *seed_string = NULL; int coreref = 0; - drbg_uninstantiate(drbg); drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm_base), &coreref, &pr); if (0 < slen) { - drbg_string_fill(&seed_string, seed, slen); - return drbg_instantiate(drbg, &seed_string, coreref, pr); - } else { - struct drbg_gen *data = (struct drbg_gen *)seed; - /* allow invocation of API call with NULL, 0 */ - if (!data) - return drbg_instantiate(drbg, NULL, coreref, pr); - drbg_set_testdata(drbg, data->test_data); - /* linked list variable is now local to allow modification */ - drbg_string_fill(&seed_string, data->addtl->buf, - data->addtl->len); - return drbg_instantiate(drbg, &seed_string, coreref, pr); + drbg_string_fill(&string, seed, slen); + seed_string = &string; } + + return drbg_instantiate(drbg, seed_string, coreref, pr); } /*************************************************************** @@ -1811,7 +1796,6 @@ static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) */ static inline int __init drbg_healthcheck_sanity(void) { -#ifdef CONFIG_CRYPTO_FIPS int len = 0; #define OUTBUFLEN 16 unsigned char buf[OUTBUFLEN]; @@ -1839,6 +1823,8 @@ static inline int __init drbg_healthcheck_sanity(void) if (!drbg) return -ENOMEM; + mutex_init(&drbg->drbg_mutex); + /* * if the following tests fail, it is likely that there is a buffer * overflow as buf is much smaller than the requested or provided @@ -1877,37 +1863,33 @@ static inline int __init drbg_healthcheck_sanity(void) outbuf: kzfree(drbg); return rc; -#else /* CONFIG_CRYPTO_FIPS */ - return 0; -#endif /* CONFIG_CRYPTO_FIPS */ } -static struct crypto_alg drbg_algs[22]; +static struct rng_alg drbg_algs[22]; /* * Fill the array drbg_algs used to register the different DRBGs * with the kernel crypto API. To fill the array, the information * from drbg_cores[] is used. */ -static inline void __init drbg_fill_array(struct crypto_alg *alg, +static inline void __init drbg_fill_array(struct rng_alg *alg, const struct drbg_core *core, int pr) { int pos = 0; static int priority = 100; - memset(alg, 0, sizeof(struct crypto_alg)); - memcpy(alg->cra_name, "stdrng", 6); + memcpy(alg->base.cra_name, "stdrng", 6); if (pr) { - memcpy(alg->cra_driver_name, "drbg_pr_", 8); + memcpy(alg->base.cra_driver_name, "drbg_pr_", 8); pos = 8; } else { - memcpy(alg->cra_driver_name, "drbg_nopr_", 10); + memcpy(alg->base.cra_driver_name, "drbg_nopr_", 10); pos = 10; } - memcpy(alg->cra_driver_name + pos, core->cra_name, + memcpy(alg->base.cra_driver_name + pos, core->cra_name, strlen(core->cra_name)); - alg->cra_priority = priority; + alg->base.cra_priority = priority; priority++; /* * If FIPS mode enabled, the selected DRBG shall have the @@ -1915,17 +1897,16 @@ static inline void __init drbg_fill_array(struct crypto_alg *alg, * it is selected. */ if (fips_enabled) - alg->cra_priority += 200; - - alg->cra_flags = CRYPTO_ALG_TYPE_RNG; - alg->cra_ctxsize = sizeof(struct drbg_state); - alg->cra_type = &crypto_rng_type; - alg->cra_module = THIS_MODULE; - alg->cra_init = drbg_kcapi_init; - alg->cra_exit = drbg_kcapi_cleanup; - alg->cra_u.rng.rng_make_random = drbg_kcapi_random; - alg->cra_u.rng.rng_reset = drbg_kcapi_reset; - alg->cra_u.rng.seedsize = 0; + alg->base.cra_priority += 200; + + alg->base.cra_ctxsize = sizeof(struct drbg_state); + alg->base.cra_module = THIS_MODULE; + alg->base.cra_init = drbg_kcapi_init; + alg->base.cra_exit = drbg_kcapi_cleanup; + alg->generate = drbg_kcapi_random; + alg->seed = drbg_kcapi_seed; + alg->set_ent = drbg_kcapi_set_entropy; + alg->seedsize = 0; } static int __init drbg_init(void) @@ -1958,12 +1939,12 @@ static int __init drbg_init(void) drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 1); for (j = 0; ARRAY_SIZE(drbg_cores) > j; j++, i++) drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 0); - return crypto_register_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); + return crypto_register_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); } static void __exit drbg_exit(void) { - crypto_unregister_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); + crypto_unregister_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2)); } module_init(drbg_init); diff --git a/crypto/echainiv.c b/crypto/echainiv.c new file mode 100644 index 0000000..bd85dcc --- /dev/null +++ b/crypto/echainiv.c @@ -0,0 +1,546 @@ +/* + * echainiv: Encrypted Chain IV Generator + * + * This generator generates an IV based on a sequence number by xoring it + * with a salt and then encrypting it with the same key as used to encrypt + * the plain text. This algorithm requires that the block size be equal + * to the IV size. It is mainly useful for CBC. + * + * This generator can only be used by algorithms where authentication + * is performed after encryption (i.e., authenc). + * + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/aead.h> +#include <crypto/null.h> +#include <crypto/rng.h> +#include <crypto/scatterwalk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> +#include <linux/string.h> + +#define MAX_IV_SIZE 16 + +struct echainiv_request_ctx { + struct scatterlist src[2]; + struct scatterlist dst[2]; + struct scatterlist ivbuf[2]; + struct scatterlist *ivsg; + struct aead_givcrypt_request subreq; +}; + +struct echainiv_ctx { + struct crypto_aead *child; + spinlock_t lock; + struct crypto_blkcipher *null; + u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); +}; + +static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv); + +static int echainiv_setkey(struct crypto_aead *tfm, + const u8 *key, unsigned int keylen) +{ + struct echainiv_ctx *ctx = crypto_aead_ctx(tfm); + + return crypto_aead_setkey(ctx->child, key, keylen); +} + +static int echainiv_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct echainiv_ctx *ctx = crypto_aead_ctx(tfm); + + return crypto_aead_setauthsize(ctx->child, authsize); +} + +/* We don't care if we get preempted and read/write IVs from the next CPU. */ +static void echainiv_read_iv(u8 *dst, unsigned size) +{ + u32 *a = (u32 *)dst; + u32 __percpu *b = echainiv_iv; + + for (; size >= 4; size -= 4) { + *a++ = this_cpu_read(*b); + b++; + } +} + +static void echainiv_write_iv(const u8 *src, unsigned size) +{ + const u32 *a = (const u32 *)src; + u32 __percpu *b = echainiv_iv; + + for (; size >= 4; size -= 4) { + this_cpu_write(*b, *a); + a++; + b++; + } +} + +static void echainiv_encrypt_compat_complete2(struct aead_request *req, + int err) +{ + struct echainiv_request_ctx *rctx = aead_request_ctx(req); + struct aead_givcrypt_request *subreq = &rctx->subreq; + struct crypto_aead *geniv; + + if (err == -EINPROGRESS) + return; + + if (err) + goto out; + + geniv = crypto_aead_reqtfm(req); + scatterwalk_map_and_copy(subreq->giv, rctx->ivsg, 0, + crypto_aead_ivsize(geniv), 1); + +out: + kzfree(subreq->giv); +} + +static void echainiv_encrypt_compat_complete( + struct crypto_async_request *base, int err) +{ + struct aead_request *req = base->data; + + echainiv_encrypt_compat_complete2(req, err); + aead_request_complete(req, err); +} + +static void echainiv_encrypt_complete2(struct aead_request *req, int err) +{ + struct aead_request *subreq = aead_request_ctx(req); + struct crypto_aead *geniv; + unsigned int ivsize; + + if (err == -EINPROGRESS) + return; + + if (err) + goto out; + + geniv = crypto_aead_reqtfm(req); + ivsize = crypto_aead_ivsize(geniv); + + echainiv_write_iv(subreq->iv, ivsize); + + if (req->iv != subreq->iv) + memcpy(req->iv, subreq->iv, ivsize); + +out: + if (req->iv != subreq->iv) + kzfree(subreq->iv); +} + +static void echainiv_encrypt_complete(struct crypto_async_request *base, + int err) +{ + struct aead_request *req = base->data; + + echainiv_encrypt_complete2(req, err); + aead_request_complete(req, err); +} + +static int echainiv_encrypt_compat(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + struct echainiv_request_ctx *rctx = aead_request_ctx(req); + struct aead_givcrypt_request *subreq = &rctx->subreq; + unsigned int ivsize = crypto_aead_ivsize(geniv); + crypto_completion_t compl; + void *data; + u8 *info; + __be64 seq; + int err; + + if (req->cryptlen < ivsize) + return -EINVAL; + + compl = req->base.complete; + data = req->base.data; + + rctx->ivsg = scatterwalk_ffwd(rctx->ivbuf, req->dst, req->assoclen); + info = PageHighMem(sg_page(rctx->ivsg)) ? NULL : sg_virt(rctx->ivsg); + + if (!info) { + info = kmalloc(ivsize, req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: + GFP_ATOMIC); + if (!info) + return -ENOMEM; + + compl = echainiv_encrypt_compat_complete; + data = req; + } + + memcpy(&seq, req->iv + ivsize - sizeof(seq), sizeof(seq)); + + aead_givcrypt_set_tfm(subreq, ctx->child); + aead_givcrypt_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); + aead_givcrypt_set_crypt(subreq, + scatterwalk_ffwd(rctx->src, req->src, + req->assoclen + ivsize), + scatterwalk_ffwd(rctx->dst, rctx->ivsg, + ivsize), + req->cryptlen - ivsize, req->iv); + aead_givcrypt_set_assoc(subreq, req->src, req->assoclen); + aead_givcrypt_set_giv(subreq, info, be64_to_cpu(seq)); + + err = crypto_aead_givencrypt(subreq); + if (unlikely(PageHighMem(sg_page(rctx->ivsg)))) + echainiv_encrypt_compat_complete2(req, err); + return err; +} + +static int echainiv_encrypt(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + struct aead_request *subreq = aead_request_ctx(req); + crypto_completion_t compl; + void *data; + u8 *info; + unsigned int ivsize = crypto_aead_ivsize(geniv); + int err; + + if (req->cryptlen < ivsize) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = echainiv_encrypt_complete; + data = req; + info = req->iv; + + if (req->src != req->dst) { + struct scatterlist src[2]; + struct scatterlist dst[2]; + struct blkcipher_desc desc = { + .tfm = ctx->null, + }; + + err = crypto_blkcipher_encrypt( + &desc, + scatterwalk_ffwd(dst, req->dst, + req->assoclen + ivsize), + scatterwalk_ffwd(src, req->src, + req->assoclen + ivsize), + req->cryptlen - ivsize); + if (err) + return err; + } + + if (unlikely(!IS_ALIGNED((unsigned long)info, + crypto_aead_alignmask(geniv) + 1))) { + info = kmalloc(ivsize, req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: + GFP_ATOMIC); + if (!info) + return -ENOMEM; + + memcpy(info, req->iv, ivsize); + } + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, req->dst, req->dst, + req->cryptlen - ivsize, info); + aead_request_set_ad(subreq, req->assoclen + ivsize); + + crypto_xor(info, ctx->salt, ivsize); + scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); + echainiv_read_iv(info, ivsize); + + err = crypto_aead_encrypt(subreq); + echainiv_encrypt_complete2(req, err); + return err; +} + +static int echainiv_decrypt_compat(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + struct echainiv_request_ctx *rctx = aead_request_ctx(req); + struct aead_request *subreq = &rctx->subreq.areq; + crypto_completion_t compl; + void *data; + unsigned int ivsize = crypto_aead_ivsize(geniv); + + if (req->cryptlen < ivsize + crypto_aead_authsize(geniv)) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = req->base.complete; + data = req->base.data; + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, + scatterwalk_ffwd(rctx->src, req->src, + req->assoclen + ivsize), + scatterwalk_ffwd(rctx->dst, req->dst, + req->assoclen + ivsize), + req->cryptlen - ivsize, req->iv); + aead_request_set_assoc(subreq, req->src, req->assoclen); + + scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0); + + return crypto_aead_decrypt(subreq); +} + +static int echainiv_decrypt(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + struct aead_request *subreq = aead_request_ctx(req); + crypto_completion_t compl; + void *data; + unsigned int ivsize = crypto_aead_ivsize(geniv); + + if (req->cryptlen < ivsize + crypto_aead_authsize(geniv)) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = req->base.complete; + data = req->base.data; + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen - ivsize, req->iv); + aead_request_set_ad(subreq, req->assoclen + ivsize); + + scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0); + if (req->src != req->dst) + scatterwalk_map_and_copy(req->iv, req->dst, + req->assoclen, ivsize, 1); + + return crypto_aead_decrypt(subreq); +} + +static int echainiv_encrypt_compat_first(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + int err = 0; + + spin_lock_bh(&ctx->lock); + if (geniv->encrypt != echainiv_encrypt_compat_first) + goto unlock; + + geniv->encrypt = echainiv_encrypt_compat; + err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, + crypto_aead_ivsize(geniv)); + +unlock: + spin_unlock_bh(&ctx->lock); + + if (err) + return err; + + return echainiv_encrypt_compat(req); +} + +static int echainiv_encrypt_first(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + int err = 0; + + spin_lock_bh(&ctx->lock); + if (geniv->encrypt != echainiv_encrypt_first) + goto unlock; + + geniv->encrypt = echainiv_encrypt; + err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, + crypto_aead_ivsize(geniv)); + +unlock: + spin_unlock_bh(&ctx->lock); + + if (err) + return err; + + return echainiv_encrypt(req); +} + +static int echainiv_compat_init(struct crypto_tfm *tfm) +{ + struct crypto_aead *geniv = __crypto_aead_cast(tfm); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + int err; + + spin_lock_init(&ctx->lock); + + crypto_aead_set_reqsize(geniv, sizeof(struct echainiv_request_ctx)); + + err = aead_geniv_init(tfm); + + ctx->child = geniv->child; + geniv->child = geniv; + + return err; +} + +static int echainiv_init(struct crypto_tfm *tfm) +{ + struct crypto_aead *geniv = __crypto_aead_cast(tfm); + struct echainiv_ctx *ctx = crypto_aead_ctx(geniv); + int err; + + spin_lock_init(&ctx->lock); + + crypto_aead_set_reqsize(geniv, sizeof(struct aead_request)); + + ctx->null = crypto_get_default_null_skcipher(); + err = PTR_ERR(ctx->null); + if (IS_ERR(ctx->null)) + goto out; + + err = aead_geniv_init(tfm); + if (err) + goto drop_null; + + ctx->child = geniv->child; + geniv->child = geniv; + +out: + return err; + +drop_null: + crypto_put_default_null_skcipher(); + goto out; +} + +static void echainiv_compat_exit(struct crypto_tfm *tfm) +{ + struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_aead(ctx->child); +} + +static void echainiv_exit(struct crypto_tfm *tfm) +{ + struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_aead(ctx->child); + crypto_put_default_null_skcipher(); +} + +static int echainiv_aead_create(struct crypto_template *tmpl, + struct rtattr **tb) +{ + struct aead_instance *inst; + struct crypto_aead_spawn *spawn; + struct aead_alg *alg; + int err; + + inst = aead_geniv_alloc(tmpl, tb, 0, 0); + + if (IS_ERR(inst)) + return PTR_ERR(inst); + + err = -EINVAL; + if (inst->alg.ivsize < sizeof(u64) || + inst->alg.ivsize & (sizeof(u32) - 1) || + inst->alg.ivsize > MAX_IV_SIZE) + goto free_inst; + + spawn = aead_instance_ctx(inst); + alg = crypto_spawn_aead_alg(spawn); + + inst->alg.setkey = echainiv_setkey; + inst->alg.setauthsize = echainiv_setauthsize; + inst->alg.encrypt = echainiv_encrypt_first; + inst->alg.decrypt = echainiv_decrypt; + + inst->alg.base.cra_init = echainiv_init; + inst->alg.base.cra_exit = echainiv_exit; + + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_ctxsize = sizeof(struct echainiv_ctx); + inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize; + + if (alg->base.cra_aead.encrypt) { + inst->alg.encrypt = echainiv_encrypt_compat_first; + inst->alg.decrypt = echainiv_decrypt_compat; + + inst->alg.base.cra_init = echainiv_compat_init; + inst->alg.base.cra_exit = echainiv_compat_exit; + } + + err = aead_register_instance(tmpl, inst); + if (err) + goto free_inst; + +out: + return err; + +free_inst: + aead_geniv_free(inst); + goto out; +} + +static int echainiv_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + int err; + + err = crypto_get_default_rng(); + if (err) + goto out; + + err = echainiv_aead_create(tmpl, tb); + if (err) + goto put_rng; + +out: + return err; + +put_rng: + crypto_put_default_rng(); + goto out; +} + +static void echainiv_free(struct crypto_instance *inst) +{ + aead_geniv_free(aead_instance(inst)); + crypto_put_default_rng(); +} + +static struct crypto_template echainiv_tmpl = { + .name = "echainiv", + .create = echainiv_create, + .free = echainiv_free, + .module = THIS_MODULE, +}; + +static int __init echainiv_module_init(void) +{ + return crypto_register_template(&echainiv_tmpl); +} + +static void __exit echainiv_module_exit(void) +{ + crypto_unregister_template(&echainiv_tmpl); +} + +module_init(echainiv_module_init); +module_exit(echainiv_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Encrypted Chain IV Generator"); +MODULE_ALIAS_CRYPTO("echainiv"); diff --git a/crypto/fips.c b/crypto/fips.c index 5539700..9d627c1 100644 --- a/crypto/fips.c +++ b/crypto/fips.c @@ -10,7 +10,12 @@ * */ -#include "internal.h" +#include <linux/export.h> +#include <linux/fips.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/sysctl.h> int fips_enabled; EXPORT_SYMBOL_GPL(fips_enabled); @@ -25,3 +30,49 @@ static int fips_enable(char *str) } __setup("fips=", fips_enable); + +static struct ctl_table crypto_sysctl_table[] = { + { + .procname = "fips_enabled", + .data = &fips_enabled, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec + }, + {} +}; + +static struct ctl_table crypto_dir_table[] = { + { + .procname = "crypto", + .mode = 0555, + .child = crypto_sysctl_table + }, + {} +}; + +static struct ctl_table_header *crypto_sysctls; + +static void crypto_proc_fips_init(void) +{ + crypto_sysctls = register_sysctl_table(crypto_dir_table); +} + +static void crypto_proc_fips_exit(void) +{ + unregister_sysctl_table(crypto_sysctls); +} + +static int __init fips_init(void) +{ + crypto_proc_fips_init(); + return 0; +} + +static void __exit fips_exit(void) +{ + crypto_proc_fips_exit(); +} + +module_init(fips_init); +module_exit(fips_exit); diff --git a/crypto/gcm.c b/crypto/gcm.c index 2e403f6..fc2b55e 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -12,6 +12,7 @@ #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/internal/hash.h> +#include <crypto/null.h> #include <crypto/scatterwalk.h> #include <crypto/hash.h> #include "internal.h" @@ -39,7 +40,6 @@ struct crypto_rfc4106_ctx { struct crypto_rfc4543_instance_ctx { struct crypto_aead_spawn aead; - struct crypto_skcipher_spawn null; }; struct crypto_rfc4543_ctx { @@ -672,12 +672,12 @@ static int crypto_gcm_init_tfm(struct crypto_tfm *tfm) align = crypto_tfm_alg_alignmask(tfm); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_aead.reqsize = align + - offsetof(struct crypto_gcm_req_priv_ctx, u) + + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + align + offsetof(struct crypto_gcm_req_priv_ctx, u) + max(sizeof(struct ablkcipher_request) + crypto_ablkcipher_reqsize(ctr), sizeof(struct ahash_request) + - crypto_ahash_reqsize(ghash)); + crypto_ahash_reqsize(ghash))); return 0; @@ -946,10 +946,10 @@ static int crypto_rfc4106_init_tfm(struct crypto_tfm *tfm) align = crypto_aead_alignmask(aead); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_aead.reqsize = sizeof(struct aead_request) + - ALIGN(crypto_aead_reqsize(aead), - crypto_tfm_ctx_alignment()) + - align + 16; + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request) + + ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) + + align + 16); return 0; } @@ -1246,7 +1246,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) if (IS_ERR(aead)) return PTR_ERR(aead); - null = crypto_spawn_blkcipher(&ictx->null.base); + null = crypto_get_default_null_skcipher(); err = PTR_ERR(null); if (IS_ERR(null)) goto err_free_aead; @@ -1256,10 +1256,10 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm) align = crypto_aead_alignmask(aead); align &= ~(crypto_tfm_ctx_alignment() - 1); - tfm->crt_aead.reqsize = sizeof(struct crypto_rfc4543_req_ctx) + - ALIGN(crypto_aead_reqsize(aead), - crypto_tfm_ctx_alignment()) + - align + 16; + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct crypto_rfc4543_req_ctx) + + ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) + + align + 16); return 0; @@ -1273,7 +1273,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm) struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm); crypto_free_aead(ctx->child); - crypto_free_blkcipher(ctx->null); + crypto_put_default_null_skcipher(); } static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) @@ -1311,23 +1311,15 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) alg = crypto_aead_spawn_alg(spawn); - crypto_set_skcipher_spawn(&ctx->null, inst); - err = crypto_grab_skcipher(&ctx->null, "ecb(cipher_null)", 0, - CRYPTO_ALG_ASYNC); - if (err) - goto out_drop_alg; - - crypto_skcipher_spawn_alg(&ctx->null); - err = -EINVAL; /* We only support 16-byte blocks. */ if (alg->cra_aead.ivsize != 16) - goto out_drop_ecbnull; + goto out_drop_alg; /* Not a stream cipher? */ if (alg->cra_blocksize != 1) - goto out_drop_ecbnull; + goto out_drop_alg; err = -ENAMETOOLONG; if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, @@ -1335,7 +1327,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4543(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_drop_ecbnull; + goto out_drop_alg; inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD; inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC; @@ -1362,8 +1354,6 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) out: return inst; -out_drop_ecbnull: - crypto_drop_skcipher(&ctx->null); out_drop_alg: crypto_drop_aead(spawn); out_free_inst: @@ -1377,7 +1367,6 @@ static void crypto_rfc4543_free(struct crypto_instance *inst) struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst); crypto_drop_aead(&ctx->aead); - crypto_drop_skcipher(&ctx->null); kfree(inst); } diff --git a/crypto/internal.h b/crypto/internal.h index bd39bfc..00e42a3 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -25,7 +25,6 @@ #include <linux/notifier.h> #include <linux/rwsem.h> #include <linux/slab.h> -#include <linux/fips.h> /* Crypto notification events. */ enum { @@ -103,6 +102,8 @@ int crypto_register_notifier(struct notifier_block *nb); int crypto_unregister_notifier(struct notifier_block *nb); int crypto_probing_notify(unsigned long val, void *v); +unsigned int crypto_alg_extsize(struct crypto_alg *alg); + static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg) { atomic_inc(&alg->cra_refcnt); diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c new file mode 100644 index 0000000..1ebe58a --- /dev/null +++ b/crypto/jitterentropy.c @@ -0,0 +1,909 @@ +/* + * Non-physical true random number generator based on timing jitter. + * + * Copyright Stephan Mueller <smueller@chronox.de>, 2014 + * + * Design + * ====== + * + * See http://www.chronox.de/jent.html + * + * License + * ======= + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * ALTERNATIVELY, this product may be distributed under the terms of + * the GNU General Public License, in which case the provisions of the GPL2 are + * required INSTEAD OF the above restrictions. (This clause is + * necessary due to a potential bad interaction between the GPL and + * the restrictions contained in a BSD-style copyright.) + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +/* + * This Jitterentropy RNG is based on the jitterentropy library + * version 1.1.0 provided at http://www.chronox.de/jent.html + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/fips.h> +#include <linux/time.h> +#include <linux/crypto.h> +#include <crypto/internal/rng.h> + +#ifdef __OPTIMIZE__ + #error "The CPU Jitter random number generator must not be compiled with optimizations. See documentation. Use the compiler switch -O0 for compiling jitterentropy.c." +#endif + +/* The entropy pool */ +struct rand_data { + /* all data values that are vital to maintain the security + * of the RNG are marked as SENSITIVE. A user must not + * access that information while the RNG executes its loops to + * calculate the next random value. */ + __u64 data; /* SENSITIVE Actual random number */ + __u64 old_data; /* SENSITIVE Previous random number */ + __u64 prev_time; /* SENSITIVE Previous time stamp */ +#define DATA_SIZE_BITS ((sizeof(__u64)) * 8) + __u64 last_delta; /* SENSITIVE stuck test */ + __s64 last_delta2; /* SENSITIVE stuck test */ + unsigned int stuck:1; /* Time measurement stuck */ + unsigned int osr; /* Oversample rate */ + unsigned int stir:1; /* Post-processing stirring */ + unsigned int disable_unbias:1; /* Deactivate Von-Neuman unbias */ +#define JENT_MEMORY_BLOCKS 64 +#define JENT_MEMORY_BLOCKSIZE 32 +#define JENT_MEMORY_ACCESSLOOPS 128 +#define JENT_MEMORY_SIZE (JENT_MEMORY_BLOCKS*JENT_MEMORY_BLOCKSIZE) + unsigned char *mem; /* Memory access location with size of + * memblocks * memblocksize */ + unsigned int memlocation; /* Pointer to byte in *mem */ + unsigned int memblocks; /* Number of memory blocks in *mem */ + unsigned int memblocksize; /* Size of one memory block in bytes */ + unsigned int memaccessloops; /* Number of memory accesses per random + * bit generation */ +}; + +/* Flags that can be used to initialize the RNG */ +#define JENT_DISABLE_STIR (1<<0) /* Disable stirring the entropy pool */ +#define JENT_DISABLE_UNBIAS (1<<1) /* Disable the Von-Neuman Unbiaser */ +#define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more + * entropy, saves MEMORY_SIZE RAM for + * entropy collector */ + +#define DRIVER_NAME "jitterentropy" + +/* -- error codes for init function -- */ +#define JENT_ENOTIME 1 /* Timer service not available */ +#define JENT_ECOARSETIME 2 /* Timer too coarse for RNG */ +#define JENT_ENOMONOTONIC 3 /* Timer is not monotonic increasing */ +#define JENT_EMINVARIATION 4 /* Timer variations too small for RNG */ +#define JENT_EVARVAR 5 /* Timer does not produce variations of + * variations (2nd derivation of time is + * zero). */ +#define JENT_EMINVARVAR 6 /* Timer variations of variations is tooi + * small. */ + +/*************************************************************************** + * Helper functions + ***************************************************************************/ + +static inline void jent_get_nstime(__u64 *out) +{ + struct timespec ts; + __u64 tmp = 0; + + tmp = random_get_entropy(); + + /* + * If random_get_entropy does not return a value (which is possible on, + * for example, MIPS), invoke __getnstimeofday + * hoping that there are timers we can work with. + * + * The list of available timers can be obtained from + * /sys/devices/system/clocksource/clocksource0/available_clocksource + * and are registered with clocksource_register() + */ + if ((0 == tmp) && +#ifndef MODULE + (0 == timekeeping_valid_for_hres()) && +#endif + (0 == __getnstimeofday(&ts))) { + tmp = ts.tv_sec; + tmp = tmp << 32; + tmp = tmp | ts.tv_nsec; + } + + *out = tmp; +} + + +/** + * Update of the loop count used for the next round of + * an entropy collection. + * + * Input: + * @ec entropy collector struct -- may be NULL + * @bits is the number of low bits of the timer to consider + * @min is the number of bits we shift the timer value to the right at + * the end to make sure we have a guaranteed minimum value + * + * @return Newly calculated loop counter + */ +static __u64 jent_loop_shuffle(struct rand_data *ec, + unsigned int bits, unsigned int min) +{ + __u64 time = 0; + __u64 shuffle = 0; + unsigned int i = 0; + unsigned int mask = (1<<bits) - 1; + + jent_get_nstime(&time); + /* + * mix the current state of the random number into the shuffle + * calculation to balance that shuffle a bit more + */ + if (ec) + time ^= ec->data; + /* + * we fold the time value as much as possible to ensure that as many + * bits of the time stamp are included as possible + */ + for (i = 0; (DATA_SIZE_BITS / bits) > i; i++) { + shuffle ^= time & mask; + time = time >> bits; + } + + /* + * We add a lower boundary value to ensure we have a minimum + * RNG loop count. + */ + return (shuffle + (1<<min)); +} + +/*************************************************************************** + * Noise sources + ***************************************************************************/ + +/** + * CPU Jitter noise source -- this is the noise source based on the CPU + * execution time jitter + * + * This function folds the time into one bit units by iterating + * through the DATA_SIZE_BITS bit time value as follows: assume our time value + * is 0xabcd + * 1st loop, 1st shift generates 0xd000 + * 1st loop, 2nd shift generates 0x000d + * 2nd loop, 1st shift generates 0xcd00 + * 2nd loop, 2nd shift generates 0x000c + * 3rd loop, 1st shift generates 0xbcd0 + * 3rd loop, 2nd shift generates 0x000b + * 4th loop, 1st shift generates 0xabcd + * 4th loop, 2nd shift generates 0x000a + * Now, the values at the end of the 2nd shifts are XORed together. + * + * The code is deliberately inefficient and shall stay that way. This function + * is the root cause why the code shall be compiled without optimization. This + * function not only acts as folding operation, but this function's execution + * is used to measure the CPU execution time jitter. Any change to the loop in + * this function implies that careful retesting must be done. + * + * Input: + * @ec entropy collector struct -- may be NULL + * @time time stamp to be folded + * @loop_cnt if a value not equal to 0 is set, use the given value as number of + * loops to perform the folding + * + * Output: + * @folded result of folding operation + * + * @return Number of loops the folding operation is performed + */ +static __u64 jent_fold_time(struct rand_data *ec, __u64 time, + __u64 *folded, __u64 loop_cnt) +{ + unsigned int i; + __u64 j = 0; + __u64 new = 0; +#define MAX_FOLD_LOOP_BIT 4 +#define MIN_FOLD_LOOP_BIT 0 + __u64 fold_loop_cnt = + jent_loop_shuffle(ec, MAX_FOLD_LOOP_BIT, MIN_FOLD_LOOP_BIT); + + /* + * testing purposes -- allow test app to set the counter, not + * needed during runtime + */ + if (loop_cnt) + fold_loop_cnt = loop_cnt; + for (j = 0; j < fold_loop_cnt; j++) { + new = 0; + for (i = 1; (DATA_SIZE_BITS) >= i; i++) { + __u64 tmp = time << (DATA_SIZE_BITS - i); + + tmp = tmp >> (DATA_SIZE_BITS - 1); + new ^= tmp; + } + } + *folded = new; + return fold_loop_cnt; +} + +/** + * Memory Access noise source -- this is a noise source based on variations in + * memory access times + * + * This function performs memory accesses which will add to the timing + * variations due to an unknown amount of CPU wait states that need to be + * added when accessing memory. The memory size should be larger than the L1 + * caches as outlined in the documentation and the associated testing. + * + * The L1 cache has a very high bandwidth, albeit its access rate is usually + * slower than accessing CPU registers. Therefore, L1 accesses only add minimal + * variations as the CPU has hardly to wait. Starting with L2, significant + * variations are added because L2 typically does not belong to the CPU any more + * and therefore a wider range of CPU wait states is necessary for accesses. + * L3 and real memory accesses have even a wider range of wait states. However, + * to reliably access either L3 or memory, the ec->mem memory must be quite + * large which is usually not desirable. + * + * Input: + * @ec Reference to the entropy collector with the memory access data -- if + * the reference to the memory block to be accessed is NULL, this noise + * source is disabled + * @loop_cnt if a value not equal to 0 is set, use the given value as number of + * loops to perform the folding + * + * @return Number of memory access operations + */ +static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt) +{ + unsigned char *tmpval = NULL; + unsigned int wrap = 0; + __u64 i = 0; +#define MAX_ACC_LOOP_BIT 7 +#define MIN_ACC_LOOP_BIT 0 + __u64 acc_loop_cnt = + jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT); + + if (NULL == ec || NULL == ec->mem) + return 0; + wrap = ec->memblocksize * ec->memblocks; + + /* + * testing purposes -- allow test app to set the counter, not + * needed during runtime + */ + if (loop_cnt) + acc_loop_cnt = loop_cnt; + + for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) { + tmpval = ec->mem + ec->memlocation; + /* + * memory access: just add 1 to one byte, + * wrap at 255 -- memory access implies read + * from and write to memory location + */ + *tmpval = (*tmpval + 1) & 0xff; + /* + * Addition of memblocksize - 1 to pointer + * with wrap around logic to ensure that every + * memory location is hit evenly + */ + ec->memlocation = ec->memlocation + ec->memblocksize - 1; + ec->memlocation = ec->memlocation % wrap; + } + return i; +} + +/*************************************************************************** + * Start of entropy processing logic + ***************************************************************************/ + +/** + * Stuck test by checking the: + * 1st derivation of the jitter measurement (time delta) + * 2nd derivation of the jitter measurement (delta of time deltas) + * 3rd derivation of the jitter measurement (delta of delta of time deltas) + * + * All values must always be non-zero. + * + * Input: + * @ec Reference to entropy collector + * @current_delta Jitter time delta + * + * @return + * 0 jitter measurement not stuck (good bit) + * 1 jitter measurement stuck (reject bit) + */ +static void jent_stuck(struct rand_data *ec, __u64 current_delta) +{ + __s64 delta2 = ec->last_delta - current_delta; + __s64 delta3 = delta2 - ec->last_delta2; + + ec->last_delta = current_delta; + ec->last_delta2 = delta2; + + if (!current_delta || !delta2 || !delta3) + ec->stuck = 1; +} + +/** + * This is the heart of the entropy generation: calculate time deltas and + * use the CPU jitter in the time deltas. The jitter is folded into one + * bit. You can call this function the "random bit generator" as it + * produces one random bit per invocation. + * + * WARNING: ensure that ->prev_time is primed before using the output + * of this function! This can be done by calling this function + * and not using its result. + * + * Input: + * @entropy_collector Reference to entropy collector + * + * @return One random bit + */ +static __u64 jent_measure_jitter(struct rand_data *ec) +{ + __u64 time = 0; + __u64 data = 0; + __u64 current_delta = 0; + + /* Invoke one noise source before time measurement to add variations */ + jent_memaccess(ec, 0); + + /* + * Get time stamp and calculate time delta to previous + * invocation to measure the timing variations + */ + jent_get_nstime(&time); + current_delta = time - ec->prev_time; + ec->prev_time = time; + + /* Now call the next noise sources which also folds the data */ + jent_fold_time(ec, current_delta, &data, 0); + + /* + * Check whether we have a stuck measurement. The enforcement + * is performed after the stuck value has been mixed into the + * entropy pool. + */ + jent_stuck(ec, current_delta); + + return data; +} + +/** + * Von Neuman unbias as explained in RFC 4086 section 4.2. As shown in the + * documentation of that RNG, the bits from jent_measure_jitter are considered + * independent which implies that the Von Neuman unbias operation is applicable. + * A proof of the Von-Neumann unbias operation to remove skews is given in the + * document "A proposal for: Functionality classes for random number + * generators", version 2.0 by Werner Schindler, section 5.4.1. + * + * Input: + * @entropy_collector Reference to entropy collector + * + * @return One random bit + */ +static __u64 jent_unbiased_bit(struct rand_data *entropy_collector) +{ + do { + __u64 a = jent_measure_jitter(entropy_collector); + __u64 b = jent_measure_jitter(entropy_collector); + + if (a == b) + continue; + if (1 == a) + return 1; + else + return 0; + } while (1); +} + +/** + * Shuffle the pool a bit by mixing some value with a bijective function (XOR) + * into the pool. + * + * The function generates a mixer value that depends on the bits set and the + * location of the set bits in the random number generated by the entropy + * source. Therefore, based on the generated random number, this mixer value + * can have 2**64 different values. That mixer value is initialized with the + * first two SHA-1 constants. After obtaining the mixer value, it is XORed into + * the random number. + * + * The mixer value is not assumed to contain any entropy. But due to the XOR + * operation, it can also not destroy any entropy present in the entropy pool. + * + * Input: + * @entropy_collector Reference to entropy collector + */ +static void jent_stir_pool(struct rand_data *entropy_collector) +{ + /* + * to shut up GCC on 32 bit, we have to initialize the 64 variable + * with two 32 bit variables + */ + union c { + __u64 u64; + __u32 u32[2]; + }; + /* + * This constant is derived from the first two 32 bit initialization + * vectors of SHA-1 as defined in FIPS 180-4 section 5.3.1 + */ + union c constant; + /* + * The start value of the mixer variable is derived from the third + * and fourth 32 bit initialization vector of SHA-1 as defined in + * FIPS 180-4 section 5.3.1 + */ + union c mixer; + unsigned int i = 0; + + /* + * Store the SHA-1 constants in reverse order to make up the 64 bit + * value -- this applies to a little endian system, on a big endian + * system, it reverses as expected. But this really does not matter + * as we do not rely on the specific numbers. We just pick the SHA-1 + * constants as they have a good mix of bit set and unset. + */ + constant.u32[1] = 0x67452301; + constant.u32[0] = 0xefcdab89; + mixer.u32[1] = 0x98badcfe; + mixer.u32[0] = 0x10325476; + + for (i = 0; i < DATA_SIZE_BITS; i++) { + /* + * get the i-th bit of the input random number and only XOR + * the constant into the mixer value when that bit is set + */ + if ((entropy_collector->data >> i) & 1) + mixer.u64 ^= constant.u64; + mixer.u64 = rol64(mixer.u64, 1); + } + entropy_collector->data ^= mixer.u64; +} + +/** + * Generator of one 64 bit random number + * Function fills rand_data->data + * + * Input: + * @ec Reference to entropy collector + */ +static void jent_gen_entropy(struct rand_data *ec) +{ + unsigned int k = 0; + + /* priming of the ->prev_time value */ + jent_measure_jitter(ec); + + while (1) { + __u64 data = 0; + + if (ec->disable_unbias == 1) + data = jent_measure_jitter(ec); + else + data = jent_unbiased_bit(ec); + + /* enforcement of the jent_stuck test */ + if (ec->stuck) { + /* + * We only mix in the bit considered not appropriate + * without the LSFR. The reason is that if we apply + * the LSFR and we do not rotate, the 2nd bit with LSFR + * will cancel out the first LSFR application on the + * bad bit. + * + * And we do not rotate as we apply the next bit to the + * current bit location again. + */ + ec->data ^= data; + ec->stuck = 0; + continue; + } + + /* + * Fibonacci LSFR with polynom of + * x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is + * primitive according to + * http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf + * (the shift values are the polynom values minus one + * due to counting bits from 0 to 63). As the current + * position is always the LSB, the polynom only needs + * to shift data in from the left without wrap. + */ + ec->data ^= data; + ec->data ^= ((ec->data >> 63) & 1); + ec->data ^= ((ec->data >> 60) & 1); + ec->data ^= ((ec->data >> 55) & 1); + ec->data ^= ((ec->data >> 30) & 1); + ec->data ^= ((ec->data >> 27) & 1); + ec->data ^= ((ec->data >> 22) & 1); + ec->data = rol64(ec->data, 1); + + /* + * We multiply the loop value with ->osr to obtain the + * oversampling rate requested by the caller + */ + if (++k >= (DATA_SIZE_BITS * ec->osr)) + break; + } + if (ec->stir) + jent_stir_pool(ec); +} + +/** + * The continuous test required by FIPS 140-2 -- the function automatically + * primes the test if needed. + * + * Return: + * 0 if FIPS test passed + * < 0 if FIPS test failed + */ +static void jent_fips_test(struct rand_data *ec) +{ + if (!fips_enabled) + return; + + /* prime the FIPS test */ + if (!ec->old_data) { + ec->old_data = ec->data; + jent_gen_entropy(ec); + } + + if (ec->data == ec->old_data) + panic(DRIVER_NAME ": Duplicate output detected\n"); + + ec->old_data = ec->data; +} + + +/** + * Entry function: Obtain entropy for the caller. + * + * This function invokes the entropy gathering logic as often to generate + * as many bytes as requested by the caller. The entropy gathering logic + * creates 64 bit per invocation. + * + * This function truncates the last 64 bit entropy value output to the exact + * size specified by the caller. + * + * Input: + * @ec Reference to entropy collector + * @data pointer to buffer for storing random data -- buffer must already + * exist + * @len size of the buffer, specifying also the requested number of random + * in bytes + * + * @return 0 when request is fulfilled or an error + * + * The following error codes can occur: + * -1 entropy_collector is NULL + */ +static ssize_t jent_read_entropy(struct rand_data *ec, u8 *data, size_t len) +{ + u8 *p = data; + + if (!ec) + return -EINVAL; + + while (0 < len) { + size_t tocopy; + + jent_gen_entropy(ec); + jent_fips_test(ec); + if ((DATA_SIZE_BITS / 8) < len) + tocopy = (DATA_SIZE_BITS / 8); + else + tocopy = len; + memcpy(p, &ec->data, tocopy); + + len -= tocopy; + p += tocopy; + } + + return 0; +} + +/*************************************************************************** + * Initialization logic + ***************************************************************************/ + +static struct rand_data *jent_entropy_collector_alloc(unsigned int osr, + unsigned int flags) +{ + struct rand_data *entropy_collector; + + entropy_collector = kzalloc(sizeof(struct rand_data), GFP_KERNEL); + if (!entropy_collector) + return NULL; + + if (!(flags & JENT_DISABLE_MEMORY_ACCESS)) { + /* Allocate memory for adding variations based on memory + * access + */ + entropy_collector->mem = kzalloc(JENT_MEMORY_SIZE, GFP_KERNEL); + if (!entropy_collector->mem) { + kfree(entropy_collector); + return NULL; + } + entropy_collector->memblocksize = JENT_MEMORY_BLOCKSIZE; + entropy_collector->memblocks = JENT_MEMORY_BLOCKS; + entropy_collector->memaccessloops = JENT_MEMORY_ACCESSLOOPS; + } + + /* verify and set the oversampling rate */ + if (0 == osr) + osr = 1; /* minimum sampling rate is 1 */ + entropy_collector->osr = osr; + + entropy_collector->stir = 1; + if (flags & JENT_DISABLE_STIR) + entropy_collector->stir = 0; + if (flags & JENT_DISABLE_UNBIAS) + entropy_collector->disable_unbias = 1; + + /* fill the data pad with non-zero values */ + jent_gen_entropy(entropy_collector); + + return entropy_collector; +} + +static void jent_entropy_collector_free(struct rand_data *entropy_collector) +{ + if (entropy_collector->mem) + kzfree(entropy_collector->mem); + entropy_collector->mem = NULL; + if (entropy_collector) + kzfree(entropy_collector); + entropy_collector = NULL; +} + +static int jent_entropy_init(void) +{ + int i; + __u64 delta_sum = 0; + __u64 old_delta = 0; + int time_backwards = 0; + int count_var = 0; + int count_mod = 0; + + /* We could perform statistical tests here, but the problem is + * that we only have a few loop counts to do testing. These + * loop counts may show some slight skew and we produce + * false positives. + * + * Moreover, only old systems show potentially problematic + * jitter entropy that could potentially be caught here. But + * the RNG is intended for hardware that is available or widely + * used, but not old systems that are long out of favor. Thus, + * no statistical tests. + */ + + /* + * We could add a check for system capabilities such as clock_getres or + * check for CONFIG_X86_TSC, but it does not make much sense as the + * following sanity checks verify that we have a high-resolution + * timer. + */ + /* + * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is + * definitely too little. + */ +#define TESTLOOPCOUNT 300 +#define CLEARCACHE 100 + for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) { + __u64 time = 0; + __u64 time2 = 0; + __u64 folded = 0; + __u64 delta = 0; + unsigned int lowdelta = 0; + + jent_get_nstime(&time); + jent_fold_time(NULL, time, &folded, 1<<MIN_FOLD_LOOP_BIT); + jent_get_nstime(&time2); + + /* test whether timer works */ + if (!time || !time2) + return JENT_ENOTIME; + delta = time2 - time; + /* + * test whether timer is fine grained enough to provide + * delta even when called shortly after each other -- this + * implies that we also have a high resolution timer + */ + if (!delta) + return JENT_ECOARSETIME; + + /* + * up to here we did not modify any variable that will be + * evaluated later, but we already performed some work. Thus we + * already have had an impact on the caches, branch prediction, + * etc. with the goal to clear it to get the worst case + * measurements. + */ + if (CLEARCACHE > i) + continue; + + /* test whether we have an increasing timer */ + if (!(time2 > time)) + time_backwards++; + + /* + * Avoid modulo of 64 bit integer to allow code to compile + * on 32 bit architectures. + */ + lowdelta = time2 - time; + if (!(lowdelta % 100)) + count_mod++; + + /* + * ensure that we have a varying delta timer which is necessary + * for the calculation of entropy -- perform this check + * only after the first loop is executed as we need to prime + * the old_data value + */ + if (i) { + if (delta != old_delta) + count_var++; + if (delta > old_delta) + delta_sum += (delta - old_delta); + else + delta_sum += (old_delta - delta); + } + old_delta = delta; + } + + /* + * we allow up to three times the time running backwards. + * CLOCK_REALTIME is affected by adjtime and NTP operations. Thus, + * if such an operation just happens to interfere with our test, it + * should not fail. The value of 3 should cover the NTP case being + * performed during our test run. + */ + if (3 < time_backwards) + return JENT_ENOMONOTONIC; + /* Error if the time variances are always identical */ + if (!delta_sum) + return JENT_EVARVAR; + + /* + * Variations of deltas of time must on average be larger + * than 1 to ensure the entropy estimation + * implied with 1 is preserved + */ + if (delta_sum <= 1) + return JENT_EMINVARVAR; + + /* + * Ensure that we have variations in the time stamp below 10 for at + * least 10% of all checks -- on some platforms, the counter + * increments in multiples of 100, but not always + */ + if ((TESTLOOPCOUNT/10 * 9) < count_mod) + return JENT_ECOARSETIME; + + return 0; +} + +/*************************************************************************** + * Kernel crypto API interface + ***************************************************************************/ + +struct jitterentropy { + spinlock_t jent_lock; + struct rand_data *entropy_collector; +}; + +static int jent_kcapi_init(struct crypto_tfm *tfm) +{ + struct jitterentropy *rng = crypto_tfm_ctx(tfm); + int ret = 0; + + rng->entropy_collector = jent_entropy_collector_alloc(1, 0); + if (!rng->entropy_collector) + ret = -ENOMEM; + + spin_lock_init(&rng->jent_lock); + return ret; +} + +static void jent_kcapi_cleanup(struct crypto_tfm *tfm) +{ + struct jitterentropy *rng = crypto_tfm_ctx(tfm); + + spin_lock(&rng->jent_lock); + if (rng->entropy_collector) + jent_entropy_collector_free(rng->entropy_collector); + rng->entropy_collector = NULL; + spin_unlock(&rng->jent_lock); +} + +static int jent_kcapi_random(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) +{ + struct jitterentropy *rng = crypto_rng_ctx(tfm); + int ret = 0; + + spin_lock(&rng->jent_lock); + ret = jent_read_entropy(rng->entropy_collector, rdata, dlen); + spin_unlock(&rng->jent_lock); + + return ret; +} + +static int jent_kcapi_reset(struct crypto_rng *tfm, + const u8 *seed, unsigned int slen) +{ + return 0; +} + +static struct rng_alg jent_alg = { + .generate = jent_kcapi_random, + .seed = jent_kcapi_reset, + .seedsize = 0, + .base = { + .cra_name = "jitterentropy_rng", + .cra_driver_name = "jitterentropy_rng", + .cra_priority = 100, + .cra_ctxsize = sizeof(struct jitterentropy), + .cra_module = THIS_MODULE, + .cra_init = jent_kcapi_init, + .cra_exit = jent_kcapi_cleanup, + + } +}; + +static int __init jent_mod_init(void) +{ + int ret = 0; + + ret = jent_entropy_init(); + if (ret) { + pr_info(DRIVER_NAME ": Initialization failed with host not compliant with requirements: %d\n", ret); + return -EFAULT; + } + return crypto_register_rng(&jent_alg); +} + +static void __exit jent_mod_exit(void) +{ + crypto_unregister_rng(&jent_alg); +} + +module_init(jent_mod_init); +module_exit(jent_mod_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>"); +MODULE_DESCRIPTION("Non-physical True Random Number Generator based on CPU Jitter"); +MODULE_ALIAS_CRYPTO("jitterentropy_rng"); diff --git a/crypto/krng.c b/crypto/krng.c index 0224841..40ed78e 100644 --- a/crypto/krng.c +++ b/crypto/krng.c @@ -16,31 +16,27 @@ #include <linux/module.h> #include <linux/random.h> -static int krng_get_random(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen) +static int krng_generate(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *rdata, unsigned int dlen) { get_random_bytes(rdata, dlen); return 0; } -static int krng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +static int krng_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { return 0; } -static struct crypto_alg krng_alg = { - .cra_name = "stdrng", - .cra_driver_name = "krng", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_RNG, - .cra_ctxsize = 0, - .cra_type = &crypto_rng_type, - .cra_module = THIS_MODULE, - .cra_u = { - .rng = { - .rng_make_random = krng_get_random, - .rng_reset = krng_reset, - .seedsize = 0, - } +static struct rng_alg krng_alg = { + .generate = krng_generate, + .seed = krng_seed, + .base = { + .cra_name = "stdrng", + .cra_driver_name = "krng", + .cra_priority = 200, + .cra_module = THIS_MODULE, } }; @@ -48,13 +44,12 @@ static struct crypto_alg krng_alg = { /* Module initalization */ static int __init krng_mod_init(void) { - return crypto_register_alg(&krng_alg); + return crypto_register_rng(&krng_alg); } static void __exit krng_mod_fini(void) { - crypto_unregister_alg(&krng_alg); - return; + crypto_unregister_rng(&krng_alg); } module_init(krng_mod_init); diff --git a/crypto/md5.c b/crypto/md5.c index 36f5e5b..33d17e9 100644 --- a/crypto/md5.c +++ b/crypto/md5.c @@ -51,10 +51,10 @@ static int md5_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = 0x67452301; - mctx->hash[1] = 0xefcdab89; - mctx->hash[2] = 0x98badcfe; - mctx->hash[3] = 0x10325476; + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; mctx->byte_count = 0; return 0; diff --git a/crypto/pcompress.c b/crypto/pcompress.c index 7140fe7..7a13b40 100644 --- a/crypto/pcompress.c +++ b/crypto/pcompress.c @@ -38,11 +38,6 @@ static int crypto_pcomp_init(struct crypto_tfm *tfm, u32 type, u32 mask) return 0; } -static unsigned int crypto_pcomp_extsize(struct crypto_alg *alg) -{ - return alg->cra_ctxsize; -} - static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm) { return 0; @@ -77,7 +72,7 @@ static void crypto_pcomp_show(struct seq_file *m, struct crypto_alg *alg) } static const struct crypto_type crypto_pcomp_type = { - .extsize = crypto_pcomp_extsize, + .extsize = crypto_alg_extsize, .init = crypto_pcomp_init, .init_tfm = crypto_pcomp_init_tfm, #ifdef CONFIG_PROC_FS diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index c305d41..ff174b6 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -20,6 +20,7 @@ #include <crypto/algapi.h> #include <crypto/internal/aead.h> +#include <linux/atomic.h> #include <linux/err.h> #include <linux/init.h> #include <linux/module.h> @@ -60,8 +61,8 @@ static struct padata_pcrypt pdecrypt; static struct kset *pcrypt_kset; struct pcrypt_instance_ctx { - struct crypto_spawn spawn; - unsigned int tfm_count; + struct crypto_aead_spawn spawn; + atomic_t tfm_count; }; struct pcrypt_aead_ctx { @@ -278,9 +279,8 @@ static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm) struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm); struct crypto_aead *cipher; - ictx->tfm_count++; - - cpu_index = ictx->tfm_count % cpumask_weight(cpu_online_mask); + cpu_index = (unsigned int)atomic_inc_return(&ictx->tfm_count) % + cpumask_weight(cpu_online_mask); ctx->cb_cpu = cpumask_first(cpu_online_mask); for (cpu = 0; cpu < cpu_index; cpu++) @@ -292,9 +292,10 @@ static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm) return PTR_ERR(cipher); ctx->child = cipher; - tfm->crt_aead.reqsize = sizeof(struct pcrypt_request) - + sizeof(struct aead_givcrypt_request) - + crypto_aead_reqsize(cipher); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct pcrypt_request) + + sizeof(struct aead_givcrypt_request) + + crypto_aead_reqsize(cipher)); return 0; } @@ -306,57 +307,50 @@ static void pcrypt_aead_exit_tfm(struct crypto_tfm *tfm) crypto_free_aead(ctx->child); } -static struct crypto_instance *pcrypt_alloc_instance(struct crypto_alg *alg) +static int pcrypt_init_instance(struct crypto_instance *inst, + struct crypto_alg *alg) { - struct crypto_instance *inst; - struct pcrypt_instance_ctx *ctx; - int err; - - inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); - if (!inst) { - inst = ERR_PTR(-ENOMEM); - goto out; - } - - err = -ENAMETOOLONG; if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME, "pcrypt(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME) - goto out_free_inst; + return -ENAMETOOLONG; memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME); - ctx = crypto_instance_ctx(inst); - err = crypto_init_spawn(&ctx->spawn, alg, inst, - CRYPTO_ALG_TYPE_MASK); - if (err) - goto out_free_inst; - inst->alg.cra_priority = alg->cra_priority + 100; inst->alg.cra_blocksize = alg->cra_blocksize; inst->alg.cra_alignmask = alg->cra_alignmask; -out: - return inst; - -out_free_inst: - kfree(inst); - inst = ERR_PTR(err); - goto out; + return 0; } static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb, u32 type, u32 mask) { + struct pcrypt_instance_ctx *ctx; struct crypto_instance *inst; struct crypto_alg *alg; + const char *name; + int err; + + name = crypto_attr_alg_name(tb[1]); + if (IS_ERR(name)) + return ERR_CAST(name); + + inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); + if (!inst) + return ERR_PTR(-ENOMEM); + + ctx = crypto_instance_ctx(inst); + crypto_set_aead_spawn(&ctx->spawn, inst); - alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK)); - if (IS_ERR(alg)) - return ERR_CAST(alg); + err = crypto_grab_aead(&ctx->spawn, name, 0, 0); + if (err) + goto out_free_inst; - inst = pcrypt_alloc_instance(alg); - if (IS_ERR(inst)) - goto out_put_alg; + alg = crypto_aead_spawn_alg(&ctx->spawn); + err = pcrypt_init_instance(inst, alg); + if (err) + goto out_drop_aead; inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC; inst->alg.cra_type = &crypto_aead_type; @@ -376,9 +370,15 @@ static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb, inst->alg.cra_aead.decrypt = pcrypt_aead_decrypt; inst->alg.cra_aead.givencrypt = pcrypt_aead_givencrypt; -out_put_alg: - crypto_mod_put(alg); +out: return inst; + +out_drop_aead: + crypto_drop_aead(&ctx->spawn); +out_free_inst: + kfree(inst); + inst = ERR_PTR(err); + goto out; } static struct crypto_instance *pcrypt_alloc(struct rtattr **tb) @@ -401,7 +401,7 @@ static void pcrypt_free(struct crypto_instance *inst) { struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst); - crypto_drop_spawn(&ctx->spawn); + crypto_drop_aead(&ctx->spawn); kfree(inst); } diff --git a/crypto/proc.c b/crypto/proc.c index 4ffe73b..2cc10c9 100644 --- a/crypto/proc.c +++ b/crypto/proc.c @@ -20,47 +20,8 @@ #include <linux/rwsem.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> -#include <linux/sysctl.h> #include "internal.h" -#ifdef CONFIG_CRYPTO_FIPS -static struct ctl_table crypto_sysctl_table[] = { - { - .procname = "fips_enabled", - .data = &fips_enabled, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec - }, - {} -}; - -static struct ctl_table crypto_dir_table[] = { - { - .procname = "crypto", - .mode = 0555, - .child = crypto_sysctl_table - }, - {} -}; - -static struct ctl_table_header *crypto_sysctls; - -static void crypto_proc_fips_init(void) -{ - crypto_sysctls = register_sysctl_table(crypto_dir_table); -} - -static void crypto_proc_fips_exit(void) -{ - if (crypto_sysctls) - unregister_sysctl_table(crypto_sysctls); -} -#else -#define crypto_proc_fips_init() -#define crypto_proc_fips_exit() -#endif - static void *c_start(struct seq_file *m, loff_t *pos) { down_read(&crypto_alg_sem); @@ -148,11 +109,9 @@ static const struct file_operations proc_crypto_ops = { void __init crypto_init_proc(void) { proc_create("crypto", 0, NULL, &proc_crypto_ops); - crypto_proc_fips_init(); } void __exit crypto_exit_proc(void) { - crypto_proc_fips_exit(); remove_proc_entry("crypto", NULL); } diff --git a/crypto/rng.c b/crypto/rng.c index e0a25c2..1315505 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -4,6 +4,7 @@ * RNG operations. * * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com> + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -24,12 +25,19 @@ #include <linux/cryptouser.h> #include <net/netlink.h> +#include "internal.h" + static DEFINE_MUTEX(crypto_default_rng_lock); struct crypto_rng *crypto_default_rng; EXPORT_SYMBOL_GPL(crypto_default_rng); static int crypto_default_rng_refcnt; -static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) +static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_rng, base); +} + +int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { u8 *buf = NULL; int err; @@ -43,21 +51,23 @@ static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen) seed = buf; } - err = crypto_rng_alg(tfm)->rng_reset(tfm, seed, slen); + err = crypto_rng_alg(tfm)->seed(tfm, seed, slen); - kfree(buf); + kzfree(buf); return err; } +EXPORT_SYMBOL_GPL(crypto_rng_reset); -static int crypto_init_rng_ops(struct crypto_tfm *tfm, u32 type, u32 mask) +static int crypto_rng_init_tfm(struct crypto_tfm *tfm) { - struct rng_alg *alg = &tfm->__crt_alg->cra_rng; - struct rng_tfm *ops = &tfm->crt_rng; + return 0; +} - ops->rng_gen_random = alg->rng_make_random; - ops->rng_reset = rngapi_reset; +static unsigned int seedsize(struct crypto_alg *alg) +{ + struct rng_alg *ralg = container_of(alg, struct rng_alg, base); - return 0; + return ralg->seedsize; } #ifdef CONFIG_NET @@ -67,7 +77,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) strncpy(rrng.type, "rng", sizeof(rrng.type)); - rrng.seedsize = alg->cra_rng.seedsize; + rrng.seedsize = seedsize(alg); if (nla_put(skb, CRYPTOCFGA_REPORT_RNG, sizeof(struct crypto_report_rng), &rrng)) @@ -89,24 +99,27 @@ static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : rng\n"); - seq_printf(m, "seedsize : %u\n", alg->cra_rng.seedsize); -} - -static unsigned int crypto_rng_ctxsize(struct crypto_alg *alg, u32 type, - u32 mask) -{ - return alg->cra_ctxsize; + seq_printf(m, "seedsize : %u\n", seedsize(alg)); } -const struct crypto_type crypto_rng_type = { - .ctxsize = crypto_rng_ctxsize, - .init = crypto_init_rng_ops, +static const struct crypto_type crypto_rng_type = { + .extsize = crypto_alg_extsize, + .init_tfm = crypto_rng_init_tfm, #ifdef CONFIG_PROC_FS .show = crypto_rng_show, #endif .report = crypto_rng_report, + .maskclear = ~CRYPTO_ALG_TYPE_MASK, + .maskset = CRYPTO_ALG_TYPE_MASK, + .type = CRYPTO_ALG_TYPE_RNG, + .tfmsize = offsetof(struct crypto_rng, base), }; -EXPORT_SYMBOL_GPL(crypto_rng_type); + +struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask) +{ + return crypto_alloc_tfm(alg_name, &crypto_rng_type, type, mask); +} +EXPORT_SYMBOL_GPL(crypto_alloc_rng); int crypto_get_default_rng(void) { @@ -150,5 +163,55 @@ void crypto_put_default_rng(void) } EXPORT_SYMBOL_GPL(crypto_put_default_rng); +int crypto_register_rng(struct rng_alg *alg) +{ + struct crypto_alg *base = &alg->base; + + if (alg->seedsize > PAGE_SIZE / 8) + return -EINVAL; + + base->cra_type = &crypto_rng_type; + base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK; + base->cra_flags |= CRYPTO_ALG_TYPE_RNG; + + return crypto_register_alg(base); +} +EXPORT_SYMBOL_GPL(crypto_register_rng); + +void crypto_unregister_rng(struct rng_alg *alg) +{ + crypto_unregister_alg(&alg->base); +} +EXPORT_SYMBOL_GPL(crypto_unregister_rng); + +int crypto_register_rngs(struct rng_alg *algs, int count) +{ + int i, ret; + + for (i = 0; i < count; i++) { + ret = crypto_register_rng(algs + i); + if (ret) + goto err; + } + + return 0; + +err: + for (--i; i >= 0; --i) + crypto_unregister_rng(algs + i); + + return ret; +} +EXPORT_SYMBOL_GPL(crypto_register_rngs); + +void crypto_unregister_rngs(struct rng_alg *algs, int count) +{ + int i; + + for (i = count - 1; i >= 0; --i) + crypto_unregister_rng(algs + i); +} +EXPORT_SYMBOL_GPL(crypto_unregister_rngs); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Random Number Generator"); diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c index 3bd749c..8690324 100644 --- a/crypto/scatterwalk.c +++ b/crypto/scatterwalk.c @@ -104,22 +104,18 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, unsigned int start, unsigned int nbytes, int out) { struct scatter_walk walk; - unsigned int offset = 0; + struct scatterlist tmp[2]; if (!nbytes) return; - for (;;) { - scatterwalk_start(&walk, sg); - - if (start < offset + sg->length) - break; + sg = scatterwalk_ffwd(tmp, sg, start); - offset += sg->length; - sg = sg_next(sg); - } + if (sg_page(sg) == virt_to_page(buf) && + sg->offset == offset_in_page(buf)) + return; - scatterwalk_advance(&walk, start - offset); + scatterwalk_start(&walk, sg); scatterwalk_copychunks(buf, &walk, nbytes, out); scatterwalk_done(&walk, out, 0); } @@ -146,3 +142,25 @@ int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes) return n; } EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen); + +struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], + struct scatterlist *src, + unsigned int len) +{ + for (;;) { + if (!len) + return src; + + if (src->length > len) + break; + + len -= src->length; + src = sg_next(src); + } + + sg_set_page(dst, sg_page(src), src->length - len, src->offset + len); + scatterwalk_crypto_chain(dst, sg_next(src), 0, 2); + + return dst; +} +EXPORT_SYMBOL_GPL(scatterwalk_ffwd); diff --git a/crypto/seqiv.c b/crypto/seqiv.c index b7bb9a2..127970a 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -15,7 +15,9 @@ #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> +#include <crypto/null.h> #include <crypto/rng.h> +#include <crypto/scatterwalk.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> @@ -24,11 +26,41 @@ #include <linux/spinlock.h> #include <linux/string.h> +struct seqniv_request_ctx { + struct scatterlist dst[2]; + struct aead_request subreq; +}; + struct seqiv_ctx { spinlock_t lock; u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); }; +struct seqiv_aead_ctx { + struct crypto_aead *child; + spinlock_t lock; + struct crypto_blkcipher *null; + u8 salt[] __attribute__ ((aligned(__alignof__(u32)))); +}; + +static void seqiv_free(struct crypto_instance *inst); + +static int seqiv_aead_setkey(struct crypto_aead *tfm, + const u8 *key, unsigned int keylen) +{ + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(tfm); + + return crypto_aead_setkey(ctx->child, key, keylen); +} + +static int seqiv_aead_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(tfm); + + return crypto_aead_setauthsize(ctx->child, authsize); +} + static void seqiv_complete2(struct skcipher_givcrypt_request *req, int err) { struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req); @@ -81,6 +113,77 @@ static void seqiv_aead_complete(struct crypto_async_request *base, int err) aead_givcrypt_complete(req, err); } +static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err) +{ + struct aead_request *subreq = aead_request_ctx(req); + struct crypto_aead *geniv; + + if (err == -EINPROGRESS) + return; + + if (err) + goto out; + + geniv = crypto_aead_reqtfm(req); + memcpy(req->iv, subreq->iv, crypto_aead_ivsize(geniv)); + +out: + kzfree(subreq->iv); +} + +static void seqiv_aead_encrypt_complete(struct crypto_async_request *base, + int err) +{ + struct aead_request *req = base->data; + + seqiv_aead_encrypt_complete2(req, err); + aead_request_complete(req, err); +} + +static void seqniv_aead_encrypt_complete2(struct aead_request *req, int err) +{ + unsigned int ivsize = 8; + u8 data[20]; + + if (err == -EINPROGRESS) + return; + + /* Swap IV and ESP header back to correct order. */ + scatterwalk_map_and_copy(data, req->dst, 0, req->assoclen + ivsize, 0); + scatterwalk_map_and_copy(data + ivsize, req->dst, 0, req->assoclen, 1); + scatterwalk_map_and_copy(data, req->dst, req->assoclen, ivsize, 1); +} + +static void seqniv_aead_encrypt_complete(struct crypto_async_request *base, + int err) +{ + struct aead_request *req = base->data; + + seqniv_aead_encrypt_complete2(req, err); + aead_request_complete(req, err); +} + +static void seqniv_aead_decrypt_complete2(struct aead_request *req, int err) +{ + u8 data[4]; + + if (err == -EINPROGRESS) + return; + + /* Move ESP header back to correct location. */ + scatterwalk_map_and_copy(data, req->dst, 16, req->assoclen - 8, 0); + scatterwalk_map_and_copy(data, req->dst, 8, req->assoclen - 8, 1); +} + +static void seqniv_aead_decrypt_complete(struct crypto_async_request *base, + int err) +{ + struct aead_request *req = base->data; + + seqniv_aead_decrypt_complete2(req, err); + aead_request_complete(req, err); +} + static void seqiv_geniv(struct seqiv_ctx *ctx, u8 *info, u64 seq, unsigned int ivsize) { @@ -186,6 +289,228 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req) return err; } +static int seqiv_aead_encrypt_compat(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + struct seqniv_request_ctx *rctx = aead_request_ctx(req); + struct aead_request *subreq = &rctx->subreq; + struct scatterlist *dst; + crypto_completion_t compl; + void *data; + unsigned int ivsize = 8; + u8 buf[20] __attribute__ ((aligned(__alignof__(u32)))); + int err; + + if (req->cryptlen < ivsize) + return -EINVAL; + + /* ESP AD is at most 12 bytes (ESN). */ + if (req->assoclen > 12) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = seqniv_aead_encrypt_complete; + data = req; + + if (req->src != req->dst) { + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct blkcipher_desc desc = { + .tfm = ctx->null, + }; + + err = crypto_blkcipher_encrypt( + &desc, + scatterwalk_ffwd(dstbuf, req->dst, + req->assoclen + ivsize), + scatterwalk_ffwd(srcbuf, req->src, + req->assoclen + ivsize), + req->cryptlen - ivsize); + if (err) + return err; + } + + dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize); + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, dst, dst, + req->cryptlen - ivsize, req->iv); + aead_request_set_ad(subreq, req->assoclen); + + memcpy(buf, req->iv, ivsize); + crypto_xor(buf, ctx->salt, ivsize); + memcpy(req->iv, buf, ivsize); + + /* Swap order of IV and ESP AD for ICV generation. */ + scatterwalk_map_and_copy(buf + ivsize, req->dst, 0, req->assoclen, 0); + scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 1); + + err = crypto_aead_encrypt(subreq); + seqniv_aead_encrypt_complete2(req, err); + return err; +} + +static int seqiv_aead_encrypt(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + struct aead_request *subreq = aead_request_ctx(req); + crypto_completion_t compl; + void *data; + u8 *info; + unsigned int ivsize = 8; + int err; + + if (req->cryptlen < ivsize) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = req->base.complete; + data = req->base.data; + info = req->iv; + + if (req->src != req->dst) { + struct scatterlist src[2]; + struct scatterlist dst[2]; + struct blkcipher_desc desc = { + .tfm = ctx->null, + }; + + err = crypto_blkcipher_encrypt( + &desc, + scatterwalk_ffwd(dst, req->dst, + req->assoclen + ivsize), + scatterwalk_ffwd(src, req->src, + req->assoclen + ivsize), + req->cryptlen - ivsize); + if (err) + return err; + } + + if (unlikely(!IS_ALIGNED((unsigned long)info, + crypto_aead_alignmask(geniv) + 1))) { + info = kmalloc(ivsize, req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: + GFP_ATOMIC); + if (!info) + return -ENOMEM; + + memcpy(info, req->iv, ivsize); + compl = seqiv_aead_encrypt_complete; + data = req; + } + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, req->dst, req->dst, + req->cryptlen - ivsize, info); + aead_request_set_ad(subreq, req->assoclen + ivsize); + + crypto_xor(info, ctx->salt, ivsize); + scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); + + err = crypto_aead_encrypt(subreq); + if (unlikely(info != req->iv)) + seqiv_aead_encrypt_complete2(req, err); + return err; +} + +static int seqiv_aead_decrypt_compat(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + struct seqniv_request_ctx *rctx = aead_request_ctx(req); + struct aead_request *subreq = &rctx->subreq; + struct scatterlist *dst; + crypto_completion_t compl; + void *data; + unsigned int ivsize = 8; + u8 buf[20]; + int err; + + if (req->cryptlen < ivsize + crypto_aead_authsize(geniv)) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = req->base.complete; + data = req->base.data; + + if (req->assoclen > 12) + return -EINVAL; + else if (req->assoclen > 8) { + compl = seqniv_aead_decrypt_complete; + data = req; + } + + if (req->src != req->dst) { + struct scatterlist srcbuf[2]; + struct scatterlist dstbuf[2]; + struct blkcipher_desc desc = { + .tfm = ctx->null, + }; + + err = crypto_blkcipher_encrypt( + &desc, + scatterwalk_ffwd(dstbuf, req->dst, + req->assoclen + ivsize), + scatterwalk_ffwd(srcbuf, req->src, + req->assoclen + ivsize), + req->cryptlen - ivsize); + if (err) + return err; + } + + /* Move ESP AD forward for ICV generation. */ + scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 0); + memcpy(req->iv, buf + req->assoclen, ivsize); + scatterwalk_map_and_copy(buf, req->dst, ivsize, req->assoclen, 1); + + dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize); + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, dst, dst, + req->cryptlen - ivsize, req->iv); + aead_request_set_ad(subreq, req->assoclen); + + err = crypto_aead_decrypt(subreq); + if (req->assoclen > 8) + seqniv_aead_decrypt_complete2(req, err); + return err; +} + +static int seqiv_aead_decrypt(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + struct aead_request *subreq = aead_request_ctx(req); + crypto_completion_t compl; + void *data; + unsigned int ivsize = 8; + + if (req->cryptlen < ivsize + crypto_aead_authsize(geniv)) + return -EINVAL; + + aead_request_set_tfm(subreq, ctx->child); + + compl = req->base.complete; + data = req->base.data; + + aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen - ivsize, req->iv); + aead_request_set_ad(subreq, req->assoclen + ivsize); + + scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0); + if (req->src != req->dst) + scatterwalk_map_and_copy(req->iv, req->dst, + req->assoclen, ivsize, 1); + + return crypto_aead_decrypt(subreq); +} + static int seqiv_givencrypt_first(struct skcipher_givcrypt_request *req) { struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); @@ -232,6 +557,52 @@ unlock: return seqiv_aead_givencrypt(req); } +static int seqiv_aead_encrypt_compat_first(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + int err = 0; + + spin_lock_bh(&ctx->lock); + if (geniv->encrypt != seqiv_aead_encrypt_compat_first) + goto unlock; + + geniv->encrypt = seqiv_aead_encrypt_compat; + err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, + crypto_aead_ivsize(geniv)); + +unlock: + spin_unlock_bh(&ctx->lock); + + if (err) + return err; + + return seqiv_aead_encrypt_compat(req); +} + +static int seqiv_aead_encrypt_first(struct aead_request *req) +{ + struct crypto_aead *geniv = crypto_aead_reqtfm(req); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + int err = 0; + + spin_lock_bh(&ctx->lock); + if (geniv->encrypt != seqiv_aead_encrypt_first) + goto unlock; + + geniv->encrypt = seqiv_aead_encrypt; + err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt, + crypto_aead_ivsize(geniv)); + +unlock: + spin_unlock_bh(&ctx->lock); + + if (err) + return err; + + return seqiv_aead_encrypt(req); +} + static int seqiv_init(struct crypto_tfm *tfm) { struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm); @@ -244,34 +615,81 @@ static int seqiv_init(struct crypto_tfm *tfm) return skcipher_geniv_init(tfm); } -static int seqiv_aead_init(struct crypto_tfm *tfm) +static int seqiv_old_aead_init(struct crypto_tfm *tfm) { struct crypto_aead *geniv = __crypto_aead_cast(tfm); struct seqiv_ctx *ctx = crypto_aead_ctx(geniv); spin_lock_init(&ctx->lock); - tfm->crt_aead.reqsize = sizeof(struct aead_request); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request)); return aead_geniv_init(tfm); } -static struct crypto_template seqiv_tmpl; +static int seqiv_aead_init_common(struct crypto_tfm *tfm, unsigned int reqsize) +{ + struct crypto_aead *geniv = __crypto_aead_cast(tfm); + struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv); + int err; + + spin_lock_init(&ctx->lock); + + crypto_aead_set_reqsize(geniv, sizeof(struct aead_request)); + + ctx->null = crypto_get_default_null_skcipher(); + err = PTR_ERR(ctx->null); + if (IS_ERR(ctx->null)) + goto out; + + err = aead_geniv_init(tfm); + if (err) + goto drop_null; + + ctx->child = geniv->child; + geniv->child = geniv; + +out: + return err; -static struct crypto_instance *seqiv_ablkcipher_alloc(struct rtattr **tb) +drop_null: + crypto_put_default_null_skcipher(); + goto out; +} + +static int seqiv_aead_init(struct crypto_tfm *tfm) +{ + return seqiv_aead_init_common(tfm, sizeof(struct aead_request)); +} + +static int seqniv_aead_init(struct crypto_tfm *tfm) +{ + return seqiv_aead_init_common(tfm, sizeof(struct seqniv_request_ctx)); +} + +static void seqiv_aead_exit(struct crypto_tfm *tfm) +{ + struct seqiv_aead_ctx *ctx = crypto_tfm_ctx(tfm); + + crypto_free_aead(ctx->child); + crypto_put_default_null_skcipher(); +} + +static int seqiv_ablkcipher_create(struct crypto_template *tmpl, + struct rtattr **tb) { struct crypto_instance *inst; + int err; - inst = skcipher_geniv_alloc(&seqiv_tmpl, tb, 0, 0); + inst = skcipher_geniv_alloc(tmpl, tb, 0, 0); if (IS_ERR(inst)) - goto out; + return PTR_ERR(inst); - if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64)) { - skcipher_geniv_free(inst); - inst = ERR_PTR(-EINVAL); - goto out; - } + err = -EINVAL; + if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64)) + goto free_inst; inst->alg.cra_ablkcipher.givencrypt = seqiv_givencrypt_first; @@ -279,65 +697,174 @@ static struct crypto_instance *seqiv_ablkcipher_alloc(struct rtattr **tb) inst->alg.cra_exit = skcipher_geniv_exit; inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize; + inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx); + + inst->alg.cra_alignmask |= __alignof__(u32) - 1; + + err = crypto_register_instance(tmpl, inst); + if (err) + goto free_inst; out: - return inst; + return err; + +free_inst: + skcipher_geniv_free(inst); + goto out; } -static struct crypto_instance *seqiv_aead_alloc(struct rtattr **tb) +static int seqiv_old_aead_create(struct crypto_template *tmpl, + struct aead_instance *aead) { - struct crypto_instance *inst; - - inst = aead_geniv_alloc(&seqiv_tmpl, tb, 0, 0); - - if (IS_ERR(inst)) - goto out; + struct crypto_instance *inst = aead_crypto_instance(aead); + int err = -EINVAL; - if (inst->alg.cra_aead.ivsize < sizeof(u64)) { - aead_geniv_free(inst); - inst = ERR_PTR(-EINVAL); - goto out; - } + if (inst->alg.cra_aead.ivsize < sizeof(u64)) + goto free_inst; inst->alg.cra_aead.givencrypt = seqiv_aead_givencrypt_first; - inst->alg.cra_init = seqiv_aead_init; + inst->alg.cra_init = seqiv_old_aead_init; inst->alg.cra_exit = aead_geniv_exit; inst->alg.cra_ctxsize = inst->alg.cra_aead.ivsize; + inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx); + + err = crypto_register_instance(tmpl, inst); + if (err) + goto free_inst; out: - return inst; + return err; + +free_inst: + aead_geniv_free(aead); + goto out; } -static struct crypto_instance *seqiv_alloc(struct rtattr **tb) +static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct aead_instance *inst; + struct crypto_aead_spawn *spawn; + struct aead_alg *alg; + int err; + + inst = aead_geniv_alloc(tmpl, tb, 0, 0); + + if (IS_ERR(inst)) + return PTR_ERR(inst); + + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; + + if (inst->alg.base.cra_aead.encrypt) + return seqiv_old_aead_create(tmpl, inst); + + err = -EINVAL; + if (inst->alg.ivsize != sizeof(u64)) + goto free_inst; + + spawn = aead_instance_ctx(inst); + alg = crypto_spawn_aead_alg(spawn); + + inst->alg.setkey = seqiv_aead_setkey; + inst->alg.setauthsize = seqiv_aead_setauthsize; + inst->alg.encrypt = seqiv_aead_encrypt_first; + inst->alg.decrypt = seqiv_aead_decrypt; + + inst->alg.base.cra_init = seqiv_aead_init; + inst->alg.base.cra_exit = seqiv_aead_exit; + + inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx); + inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize; + + if (alg->base.cra_aead.encrypt) { + inst->alg.encrypt = seqiv_aead_encrypt_compat_first; + inst->alg.decrypt = seqiv_aead_decrypt_compat; + + inst->alg.base.cra_init = seqniv_aead_init; + inst->alg.base.cra_exit = seqiv_aead_exit; + } + + err = aead_register_instance(tmpl, inst); + if (err) + goto free_inst; + +out: + return err; + +free_inst: + aead_geniv_free(inst); + goto out; +} + +static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_attr_type *algt; - struct crypto_instance *inst; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) - return ERR_CAST(algt); + return PTR_ERR(algt); err = crypto_get_default_rng(); if (err) - return ERR_PTR(err); + return err; if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK) - inst = seqiv_ablkcipher_alloc(tb); + err = seqiv_ablkcipher_create(tmpl, tb); else - inst = seqiv_aead_alloc(tb); + err = seqiv_aead_create(tmpl, tb); + + if (err) + crypto_put_default_rng(); + return err; +} + +static int seqniv_create(struct crypto_template *tmpl, struct rtattr **tb) +{ + struct aead_instance *inst; + struct crypto_aead_spawn *spawn; + struct aead_alg *alg; + int err; + + err = crypto_get_default_rng(); + if (err) + return err; + + inst = aead_geniv_alloc(tmpl, tb, 0, 0); + err = PTR_ERR(inst); if (IS_ERR(inst)) goto put_rng; - inst->alg.cra_alignmask |= __alignof__(u32) - 1; - inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx); + err = -EINVAL; + if (inst->alg.ivsize != sizeof(u64)) + goto free_inst; + + spawn = aead_instance_ctx(inst); + alg = crypto_spawn_aead_alg(spawn); + + inst->alg.setkey = seqiv_aead_setkey; + inst->alg.setauthsize = seqiv_aead_setauthsize; + inst->alg.encrypt = seqiv_aead_encrypt_compat_first; + inst->alg.decrypt = seqiv_aead_decrypt_compat; + + inst->alg.base.cra_init = seqniv_aead_init; + inst->alg.base.cra_exit = seqiv_aead_exit; + + inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; + inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx); + inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize; + + err = aead_register_instance(tmpl, inst); + if (err) + goto free_inst; out: - return inst; + return err; +free_inst: + aead_geniv_free(inst); put_rng: crypto_put_default_rng(); goto out; @@ -348,20 +875,42 @@ static void seqiv_free(struct crypto_instance *inst) if ((inst->alg.cra_flags ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK) skcipher_geniv_free(inst); else - aead_geniv_free(inst); + aead_geniv_free(aead_instance(inst)); crypto_put_default_rng(); } static struct crypto_template seqiv_tmpl = { .name = "seqiv", - .alloc = seqiv_alloc, + .create = seqiv_create, + .free = seqiv_free, + .module = THIS_MODULE, +}; + +static struct crypto_template seqniv_tmpl = { + .name = "seqniv", + .create = seqniv_create, .free = seqiv_free, .module = THIS_MODULE, }; static int __init seqiv_module_init(void) { - return crypto_register_template(&seqiv_tmpl); + int err; + + err = crypto_register_template(&seqiv_tmpl); + if (err) + goto out; + + err = crypto_register_template(&seqniv_tmpl); + if (err) + goto out_undo_niv; + +out: + return err; + +out_undo_niv: + crypto_unregister_template(&seqiv_tmpl); + goto out; } static void __exit seqiv_module_exit(void) @@ -375,3 +924,4 @@ module_exit(seqiv_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Sequence Number IV Generator"); MODULE_ALIAS_CRYPTO("seqiv"); +MODULE_ALIAS_CRYPTO("seqniv"); diff --git a/crypto/shash.c b/crypto/shash.c index 47c7139..ecb1e3d 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -520,11 +520,6 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm) return 0; } -static unsigned int crypto_shash_extsize(struct crypto_alg *alg) -{ - return alg->cra_ctxsize; -} - #ifdef CONFIG_NET static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) { @@ -564,7 +559,7 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg) static const struct crypto_type crypto_shash_type = { .ctxsize = crypto_shash_ctxsize, - .extsize = crypto_shash_extsize, + .extsize = crypto_alg_extsize, .init = crypto_init_shash_ops, .init_tfm = crypto_shash_init_tfm, #ifdef CONFIG_PROC_FS diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 1a28001..2bff613 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -22,8 +22,10 @@ * */ +#include <crypto/aead.h> #include <crypto/hash.h> #include <linux/err.h> +#include <linux/fips.h> #include <linux/init.h> #include <linux/gfp.h> #include <linux/module.h> @@ -34,7 +36,6 @@ #include <linux/timex.h> #include <linux/interrupt.h> #include "tcrypt.h" -#include "internal.h" /* * Need slab memory for testing (size in number of pages). @@ -808,7 +809,7 @@ static int test_ahash_jiffies(struct ahash_request *req, int blen, for (start = jiffies, end = start + secs * HZ, bcount = 0; time_before(jiffies, end); bcount++) { - ret = crypto_ahash_init(req); + ret = do_one_ahash_op(req, crypto_ahash_init(req)); if (ret) return ret; for (pcount = 0; pcount < blen; pcount += plen) { @@ -877,7 +878,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen, /* Warm-up run. */ for (i = 0; i < 4; i++) { - ret = crypto_ahash_init(req); + ret = do_one_ahash_op(req, crypto_ahash_init(req)); if (ret) goto out; for (pcount = 0; pcount < blen; pcount += plen) { @@ -896,7 +897,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen, start = get_cycles(); - ret = crypto_ahash_init(req); + ret = do_one_ahash_op(req, crypto_ahash_init(req)); if (ret) goto out; for (pcount = 0; pcount < blen; pcount += plen) { diff --git a/crypto/testmgr.c b/crypto/testmgr.c index f9bce3d..277b3ac 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -20,8 +20,10 @@ * */ +#include <crypto/aead.h> #include <crypto/hash.h> #include <linux/err.h> +#include <linux/fips.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <linux/slab.h> @@ -2318,6 +2320,15 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "compress_null", .test = alg_test_null, }, { + .alg = "crc32", + .test = alg_test_hash, + .suite = { + .hash = { + .vecs = crc32_tv_template, + .count = CRC32_TEST_VECTORS + } + } + }, { .alg = "crc32c", .test = alg_test_crc32c, .fips_allowed = 1, @@ -3095,6 +3106,10 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "jitterentropy_rng", + .fips_allowed = 1, + .test = alg_test_null, + }, { .alg = "lrw(aes)", .test = alg_test_skcipher, .suite = { diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 62e2485..6003143 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -54,7 +54,7 @@ struct cipher_testvec { unsigned short tap[MAX_TAP]; int np; unsigned char also_non_np; - unsigned char fail; + bool fail; unsigned char wk; /* weak key flag */ unsigned char klen; unsigned short ilen; @@ -71,7 +71,7 @@ struct aead_testvec { unsigned char atap[MAX_TAP]; int np; int anp; - unsigned char fail; + bool fail; unsigned char novrfy; /* ccm dec verification failure expected */ unsigned char wk; /* weak key flag */ unsigned char klen; @@ -1822,7 +1822,7 @@ static struct hash_testvec tgr128_tv_template[] = { }, }; -#define GHASH_TEST_VECTORS 5 +#define GHASH_TEST_VECTORS 6 static struct hash_testvec ghash_tv_template[] = { @@ -1875,6 +1875,63 @@ static struct hash_testvec ghash_tv_template[] = .psize = 20, .digest = "\xf8\x94\x87\x2a\x4b\x63\x99\x28" "\x23\xf7\x93\xf7\x19\xf5\x96\xd9", + }, { + .key = "\x0a\x1b\x2c\x3d\x4e\x5f\x64\x71" + "\x82\x93\xa4\xb5\xc6\xd7\xe8\xf9", + .ksize = 16, + .plaintext = "\x56\x6f\x72\x20\x6c\x61\x75\x74" + "\x65\x72\x20\x4c\x61\x75\x73\x63" + "\x68\x65\x6e\x20\x75\x6e\x64\x20" + "\x53\x74\x61\x75\x6e\x65\x6e\x20" + "\x73\x65\x69\x20\x73\x74\x69\x6c" + "\x6c\x2c\x0a\x64\x75\x20\x6d\x65" + "\x69\x6e\x20\x74\x69\x65\x66\x74" + "\x69\x65\x66\x65\x73\x20\x4c\x65" + "\x62\x65\x6e\x3b\x0a\x64\x61\x73" + "\x73\x20\x64\x75\x20\x77\x65\x69" + "\xc3\x9f\x74\x20\x77\x61\x73\x20" + "\x64\x65\x72\x20\x57\x69\x6e\x64" + "\x20\x64\x69\x72\x20\x77\x69\x6c" + "\x6c\x2c\x0a\x65\x68\x20\x6e\x6f" + "\x63\x68\x20\x64\x69\x65\x20\x42" + "\x69\x72\x6b\x65\x6e\x20\x62\x65" + "\x62\x65\x6e\x2e\x0a\x0a\x55\x6e" + "\x64\x20\x77\x65\x6e\x6e\x20\x64" + "\x69\x72\x20\x65\x69\x6e\x6d\x61" + "\x6c\x20\x64\x61\x73\x20\x53\x63" + "\x68\x77\x65\x69\x67\x65\x6e\x20" + "\x73\x70\x72\x61\x63\x68\x2c\x0a" + "\x6c\x61\x73\x73\x20\x64\x65\x69" + "\x6e\x65\x20\x53\x69\x6e\x6e\x65" + "\x20\x62\x65\x73\x69\x65\x67\x65" + "\x6e\x2e\x0a\x4a\x65\x64\x65\x6d" + "\x20\x48\x61\x75\x63\x68\x65\x20" + "\x67\x69\x62\x74\x20\x64\x69\x63" + "\x68\x2c\x20\x67\x69\x62\x20\x6e" + "\x61\x63\x68\x2c\x0a\x65\x72\x20" + "\x77\x69\x72\x64\x20\x64\x69\x63" + "\x68\x20\x6c\x69\x65\x62\x65\x6e" + "\x20\x75\x6e\x64\x20\x77\x69\x65" + "\x67\x65\x6e\x2e\x0a\x0a\x55\x6e" + "\x64\x20\x64\x61\x6e\x6e\x20\x6d" + "\x65\x69\x6e\x65\x20\x53\x65\x65" + "\x6c\x65\x20\x73\x65\x69\x74\x20" + "\x77\x65\x69\x74\x2c\x20\x73\x65" + "\x69\x20\x77\x65\x69\x74\x2c\x0a" + "\x64\x61\x73\x73\x20\x64\x69\x72" + "\x20\x64\x61\x73\x20\x4c\x65\x62" + "\x65\x6e\x20\x67\x65\x6c\x69\x6e" + "\x67\x65\x2c\x0a\x62\x72\x65\x69" + "\x74\x65\x20\x64\x69\x63\x68\x20" + "\x77\x69\x65\x20\x65\x69\x6e\x20" + "\x46\x65\x69\x65\x72\x6b\x6c\x65" + "\x69\x64\x0a\xc3\xbc\x62\x65\x72" + "\x20\x64\x69\x65\x20\x73\x69\x6e" + "\x6e\x65\x6e\x64\x65\x6e\x20\x44" + "\x69\x6e\x67\x65\x2e\x2e\x2e\x0a", + .psize = 400, + .digest = "\xad\xb1\xc1\xe9\x56\x70\x31\x1d" + "\xbb\x5b\xdf\x5e\x70\x72\x1a\x57", }, }; @@ -3018,7 +3075,7 @@ static struct cipher_testvec des_enc_tv_template[] = { "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90", .rlen = 24, }, { /* Weak key */ - .fail = 1, + .fail = true, .wk = 1, .key = "\x01\x01\x01\x01\x01\x01\x01\x01", .klen = 8, @@ -28591,7 +28648,7 @@ struct comp_testvec { }; struct pcomp_testvec { - void *params; + const void *params; unsigned int paramsize; int inlen, outlen; char input[COMP_BUF_SIZE]; @@ -28946,6 +29003,440 @@ static struct hash_testvec michael_mic_tv_template[] = { }; /* + * CRC32 test vectors + */ +#define CRC32_TEST_VECTORS 14 + +static struct hash_testvec crc32_tv_template[] = { + { + .key = "\x87\xa9\xcb\xed", + .ksize = 4, + .psize = 0, + .digest = "\x87\xa9\xcb\xed", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26\x27\x28", + .psize = 40, + .digest = "\x3a\xdf\x4b\xb0", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30" + "\x31\x32\x33\x34\x35\x36\x37\x38" + "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40" + "\x41\x42\x43\x44\x45\x46\x47\x48" + "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50", + .psize = 40, + .digest = "\xa9\x7a\x7f\x7b", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58" + "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60" + "\x61\x62\x63\x64\x65\x66\x67\x68" + "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" + "\x71\x72\x73\x74\x75\x76\x77\x78", + .psize = 40, + .digest = "\xba\xd3\xf8\x1c", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80" + "\x81\x82\x83\x84\x85\x86\x87\x88" + "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90" + "\x91\x92\x93\x94\x95\x96\x97\x98" + "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0", + .psize = 40, + .digest = "\xa8\xa9\xc2\x02", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8" + "\xa9\xaa\xab\xac\xad\xae\xaf\xb0" + "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8" + "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0" + "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8", + .psize = 40, + .digest = "\x27\xf0\x57\xe2", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0" + "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8" + "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0" + "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8" + "\xe9\xea\xeb\xec\xed\xee\xef\xf0", + .psize = 40, + .digest = "\x49\x78\x10\x08", + }, + { + .key = "\x80\xea\xd3\xf1", + .ksize = 4, + .plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30" + "\x31\x32\x33\x34\x35\x36\x37\x38" + "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40" + "\x41\x42\x43\x44\x45\x46\x47\x48" + "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50", + .psize = 40, + .digest = "\x9a\xb1\xdc\xf0", + }, + { + .key = "\xf3\x4a\x1d\x5d", + .ksize = 4, + .plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58" + "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60" + "\x61\x62\x63\x64\x65\x66\x67\x68" + "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" + "\x71\x72\x73\x74\x75\x76\x77\x78", + .psize = 40, + .digest = "\xb4\x97\xcc\xd4", + }, + { + .key = "\x2e\x80\x04\x59", + .ksize = 4, + .plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80" + "\x81\x82\x83\x84\x85\x86\x87\x88" + "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90" + "\x91\x92\x93\x94\x95\x96\x97\x98" + "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0", + .psize = 40, + .digest = "\x67\x9b\xfa\x79", + }, + { + .key = "\xa6\xcc\x19\x85", + .ksize = 4, + .plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8" + "\xa9\xaa\xab\xac\xad\xae\xaf\xb0" + "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8" + "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0" + "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8", + .psize = 40, + .digest = "\x24\xb5\x16\xef", + }, + { + .key = "\x41\xfc\xfe\x2d", + .ksize = 4, + .plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0" + "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8" + "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0" + "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8" + "\xe9\xea\xeb\xec\xed\xee\xef\xf0", + .psize = 40, + .digest = "\x15\x94\x80\x39", + }, + { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08" + "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18" + "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" + "\x21\x22\x23\x24\x25\x26\x27\x28" + "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30" + "\x31\x32\x33\x34\x35\x36\x37\x38" + "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40" + "\x41\x42\x43\x44\x45\x46\x47\x48" + "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50" + "\x51\x52\x53\x54\x55\x56\x57\x58" + "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60" + "\x61\x62\x63\x64\x65\x66\x67\x68" + "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70" + "\x71\x72\x73\x74\x75\x76\x77\x78" + "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80" + "\x81\x82\x83\x84\x85\x86\x87\x88" + "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90" + "\x91\x92\x93\x94\x95\x96\x97\x98" + "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0" + "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8" + "\xa9\xaa\xab\xac\xad\xae\xaf\xb0" + "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8" + "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0" + "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8" + "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0" + "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8" + "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0" + "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8" + "\xe9\xea\xeb\xec\xed\xee\xef\xf0", + .psize = 240, + .digest = "\x6c\xc6\x56\xde", + .np = 2, + .tap = { 31, 209 } + }, { + .key = "\xff\xff\xff\xff", + .ksize = 4, + .plaintext = "\x6e\x05\x79\x10\xa7\x1b\xb2\x49" + "\xe0\x54\xeb\x82\x19\x8d\x24\xbb" + "\x2f\xc6\x5d\xf4\x68\xff\x96\x0a" + "\xa1\x38\xcf\x43\xda\x71\x08\x7c" + "\x13\xaa\x1e\xb5\x4c\xe3\x57\xee" + "\x85\x1c\x90\x27\xbe\x32\xc9\x60" + "\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2" + "\x46\xdd\x74\x0b\x7f\x16\xad\x21" + "\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93" + "\x2a\xc1\x35\xcc\x63\xfa\x6e\x05" + "\x9c\x10\xa7\x3e\xd5\x49\xe0\x77" + "\x0e\x82\x19\xb0\x24\xbb\x52\xe9" + "\x5d\xf4\x8b\x22\x96\x2d\xc4\x38" + "\xcf\x66\xfd\x71\x08\x9f\x13\xaa" + "\x41\xd8\x4c\xe3\x7a\x11\x85\x1c" + "\xb3\x27\xbe\x55\xec\x60\xf7\x8e" + "\x02\x99\x30\xc7\x3b\xd2\x69\x00" + "\x74\x0b\xa2\x16\xad\x44\xdb\x4f" + "\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1" + "\x58\xef\x63\xfa\x91\x05\x9c\x33" + "\xca\x3e\xd5\x6c\x03\x77\x0e\xa5" + "\x19\xb0\x47\xde\x52\xe9\x80\x17" + "\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66" + "\xfd\x94\x08\x9f\x36\xcd\x41\xd8" + "\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a" + "\xe1\x55\xec\x83\x1a\x8e\x25\xbc" + "\x30\xc7\x5e\xf5\x69\x00\x97\x0b" + "\xa2\x39\xd0\x44\xdb\x72\x09\x7d" + "\x14\xab\x1f\xb6\x4d\xe4\x58\xef" + "\x86\x1d\x91\x28\xbf\x33\xca\x61" + "\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3" + "\x47\xde\x75\x0c\x80\x17\xae\x22" + "\xb9\x50\xe7\x5b\xf2\x89\x20\x94" + "\x2b\xc2\x36\xcd\x64\xfb\x6f\x06" + "\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78" + "\x0f\x83\x1a\xb1\x25\xbc\x53\xea" + "\x5e\xf5\x8c\x00\x97\x2e\xc5\x39" + "\xd0\x67\xfe\x72\x09\xa0\x14\xab" + "\x42\xd9\x4d\xe4\x7b\x12\x86\x1d" + "\xb4\x28\xbf\x56\xed\x61\xf8\x8f" + "\x03\x9a\x31\xc8\x3c\xd3\x6a\x01" + "\x75\x0c\xa3\x17\xae\x45\xdc\x50" + "\xe7\x7e\x15\x89\x20\xb7\x2b\xc2" + "\x59\xf0\x64\xfb\x92\x06\x9d\x34" + "\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6" + "\x1a\xb1\x48\xdf\x53\xea\x81\x18" + "\x8c\x23\xba\x2e\xc5\x5c\xf3\x67" + "\xfe\x95\x09\xa0\x37\xce\x42\xd9" + "\x70\x07\x7b\x12\xa9\x1d\xb4\x4b" + "\xe2\x56\xed\x84\x1b\x8f\x26\xbd" + "\x31\xc8\x5f\xf6\x6a\x01\x98\x0c" + "\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e" + "\x15\xac\x20\xb7\x4e\xe5\x59\xf0" + "\x87\x1e\x92\x29\xc0\x34\xcb\x62" + "\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4" + "\x48\xdf\x76\x0d\x81\x18\xaf\x23" + "\xba\x51\xe8\x5c\xf3\x8a\x21\x95" + "\x2c\xc3\x37\xce\x65\xfc\x70\x07" + "\x9e\x12\xa9\x40\xd7\x4b\xe2\x79" + "\x10\x84\x1b\xb2\x26\xbd\x54\xeb" + "\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a" + "\xd1\x68\xff\x73\x0a\xa1\x15\xac" + "\x43\xda\x4e\xe5\x7c\x13\x87\x1e" + "\xb5\x29\xc0\x57\xee\x62\xf9\x90" + "\x04\x9b\x32\xc9\x3d\xd4\x6b\x02" + "\x76\x0d\xa4\x18\xaf\x46\xdd\x51" + "\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3" + "\x5a\xf1\x65\xfc\x93\x07\x9e\x35" + "\xcc\x40\xd7\x6e\x05\x79\x10\xa7" + "\x1b\xb2\x49\xe0\x54\xeb\x82\x19" + "\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68" + "\xff\x96\x0a\xa1\x38\xcf\x43\xda" + "\x71\x08\x7c\x13\xaa\x1e\xb5\x4c" + "\xe3\x57\xee\x85\x1c\x90\x27\xbe" + "\x32\xc9\x60\xf7\x6b\x02\x99\x0d" + "\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f" + "\x16\xad\x21\xb8\x4f\xe6\x5a\xf1" + "\x88\x1f\x93\x2a\xc1\x35\xcc\x63" + "\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5" + "\x49\xe0\x77\x0e\x82\x19\xb0\x24" + "\xbb\x52\xe9\x5d\xf4\x8b\x22\x96" + "\x2d\xc4\x38\xcf\x66\xfd\x71\x08" + "\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a" + "\x11\x85\x1c\xb3\x27\xbe\x55\xec" + "\x60\xf7\x8e\x02\x99\x30\xc7\x3b" + "\xd2\x69\x00\x74\x0b\xa2\x16\xad" + "\x44\xdb\x4f\xe6\x7d\x14\x88\x1f" + "\xb6\x2a\xc1\x58\xef\x63\xfa\x91" + "\x05\x9c\x33\xca\x3e\xd5\x6c\x03" + "\x77\x0e\xa5\x19\xb0\x47\xde\x52" + "\xe9\x80\x17\x8b\x22\xb9\x2d\xc4" + "\x5b\xf2\x66\xfd\x94\x08\x9f\x36" + "\xcd\x41\xd8\x6f\x06\x7a\x11\xa8" + "\x1c\xb3\x4a\xe1\x55\xec\x83\x1a" + "\x8e\x25\xbc\x30\xc7\x5e\xf5\x69" + "\x00\x97\x0b\xa2\x39\xd0\x44\xdb" + "\x72\x09\x7d\x14\xab\x1f\xb6\x4d" + "\xe4\x58\xef\x86\x1d\x91\x28\xbf" + "\x33\xca\x61\xf8\x6c\x03\x9a\x0e" + "\xa5\x3c\xd3\x47\xde\x75\x0c\x80" + "\x17\xae\x22\xb9\x50\xe7\x5b\xf2" + "\x89\x20\x94\x2b\xc2\x36\xcd\x64" + "\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6" + "\x4a\xe1\x78\x0f\x83\x1a\xb1\x25" + "\xbc\x53\xea\x5e\xf5\x8c\x00\x97" + "\x2e\xc5\x39\xd0\x67\xfe\x72\x09" + "\xa0\x14\xab\x42\xd9\x4d\xe4\x7b" + "\x12\x86\x1d\xb4\x28\xbf\x56\xed" + "\x61\xf8\x8f\x03\x9a\x31\xc8\x3c" + "\xd3\x6a\x01\x75\x0c\xa3\x17\xae" + "\x45\xdc\x50\xe7\x7e\x15\x89\x20" + "\xb7\x2b\xc2\x59\xf0\x64\xfb\x92" + "\x06\x9d\x34\xcb\x3f\xd6\x6d\x04" + "\x78\x0f\xa6\x1a\xb1\x48\xdf\x53" + "\xea\x81\x18\x8c\x23\xba\x2e\xc5" + "\x5c\xf3\x67\xfe\x95\x09\xa0\x37" + "\xce\x42\xd9\x70\x07\x7b\x12\xa9" + "\x1d\xb4\x4b\xe2\x56\xed\x84\x1b" + "\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a" + "\x01\x98\x0c\xa3\x3a\xd1\x45\xdc" + "\x73\x0a\x7e\x15\xac\x20\xb7\x4e" + "\xe5\x59\xf0\x87\x1e\x92\x29\xc0" + "\x34\xcb\x62\xf9\x6d\x04\x9b\x0f" + "\xa6\x3d\xd4\x48\xdf\x76\x0d\x81" + "\x18\xaf\x23\xba\x51\xe8\x5c\xf3" + "\x8a\x21\x95\x2c\xc3\x37\xce\x65" + "\xfc\x70\x07\x9e\x12\xa9\x40\xd7" + "\x4b\xe2\x79\x10\x84\x1b\xb2\x26" + "\xbd\x54\xeb\x5f\xf6\x8d\x01\x98" + "\x2f\xc6\x3a\xd1\x68\xff\x73\x0a" + "\xa1\x15\xac\x43\xda\x4e\xe5\x7c" + "\x13\x87\x1e\xb5\x29\xc0\x57\xee" + "\x62\xf9\x90\x04\x9b\x32\xc9\x3d" + "\xd4\x6b\x02\x76\x0d\xa4\x18\xaf" + "\x46\xdd\x51\xe8\x7f\x16\x8a\x21" + "\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93" + "\x07\x9e\x35\xcc\x40\xd7\x6e\x05" + "\x79\x10\xa7\x1b\xb2\x49\xe0\x54" + "\xeb\x82\x19\x8d\x24\xbb\x2f\xc6" + "\x5d\xf4\x68\xff\x96\x0a\xa1\x38" + "\xcf\x43\xda\x71\x08\x7c\x13\xaa" + "\x1e\xb5\x4c\xe3\x57\xee\x85\x1c" + "\x90\x27\xbe\x32\xc9\x60\xf7\x6b" + "\x02\x99\x0d\xa4\x3b\xd2\x46\xdd" + "\x74\x0b\x7f\x16\xad\x21\xb8\x4f" + "\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1" + "\x35\xcc\x63\xfa\x6e\x05\x9c\x10" + "\xa7\x3e\xd5\x49\xe0\x77\x0e\x82" + "\x19\xb0\x24\xbb\x52\xe9\x5d\xf4" + "\x8b\x22\x96\x2d\xc4\x38\xcf\x66" + "\xfd\x71\x08\x9f\x13\xaa\x41\xd8" + "\x4c\xe3\x7a\x11\x85\x1c\xb3\x27" + "\xbe\x55\xec\x60\xf7\x8e\x02\x99" + "\x30\xc7\x3b\xd2\x69\x00\x74\x0b" + "\xa2\x16\xad\x44\xdb\x4f\xe6\x7d" + "\x14\x88\x1f\xb6\x2a\xc1\x58\xef" + "\x63\xfa\x91\x05\x9c\x33\xca\x3e" + "\xd5\x6c\x03\x77\x0e\xa5\x19\xb0" + "\x47\xde\x52\xe9\x80\x17\x8b\x22" + "\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94" + "\x08\x9f\x36\xcd\x41\xd8\x6f\x06" + "\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55" + "\xec\x83\x1a\x8e\x25\xbc\x30\xc7" + "\x5e\xf5\x69\x00\x97\x0b\xa2\x39" + "\xd0\x44\xdb\x72\x09\x7d\x14\xab" + "\x1f\xb6\x4d\xe4\x58\xef\x86\x1d" + "\x91\x28\xbf\x33\xca\x61\xf8\x6c" + "\x03\x9a\x0e\xa5\x3c\xd3\x47\xde" + "\x75\x0c\x80\x17\xae\x22\xb9\x50" + "\xe7\x5b\xf2\x89\x20\x94\x2b\xc2" + "\x36\xcd\x64\xfb\x6f\x06\x9d\x11" + "\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83" + "\x1a\xb1\x25\xbc\x53\xea\x5e\xf5" + "\x8c\x00\x97\x2e\xc5\x39\xd0\x67" + "\xfe\x72\x09\xa0\x14\xab\x42\xd9" + "\x4d\xe4\x7b\x12\x86\x1d\xb4\x28" + "\xbf\x56\xed\x61\xf8\x8f\x03\x9a" + "\x31\xc8\x3c\xd3\x6a\x01\x75\x0c" + "\xa3\x17\xae\x45\xdc\x50\xe7\x7e" + "\x15\x89\x20\xb7\x2b\xc2\x59\xf0" + "\x64\xfb\x92\x06\x9d\x34\xcb\x3f" + "\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1" + "\x48\xdf\x53\xea\x81\x18\x8c\x23" + "\xba\x2e\xc5\x5c\xf3\x67\xfe\x95" + "\x09\xa0\x37\xce\x42\xd9\x70\x07" + "\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56" + "\xed\x84\x1b\x8f\x26\xbd\x31\xc8" + "\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a" + "\xd1\x45\xdc\x73\x0a\x7e\x15\xac" + "\x20\xb7\x4e\xe5\x59\xf0\x87\x1e" + "\x92\x29\xc0\x34\xcb\x62\xf9\x6d" + "\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf" + "\x76\x0d\x81\x18\xaf\x23\xba\x51" + "\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3" + "\x37\xce\x65\xfc\x70\x07\x9e\x12" + "\xa9\x40\xd7\x4b\xe2\x79\x10\x84" + "\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6" + "\x8d\x01\x98\x2f\xc6\x3a\xd1\x68" + "\xff\x73\x0a\xa1\x15\xac\x43\xda" + "\x4e\xe5\x7c\x13\x87\x1e\xb5\x29" + "\xc0\x57\xee\x62\xf9\x90\x04\x9b" + "\x32\xc9\x3d\xd4\x6b\x02\x76\x0d" + "\xa4\x18\xaf\x46\xdd\x51\xe8\x7f" + "\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1" + "\x65\xfc\x93\x07\x9e\x35\xcc\x40" + "\xd7\x6e\x05\x79\x10\xa7\x1b\xb2" + "\x49\xe0\x54\xeb\x82\x19\x8d\x24" + "\xbb\x2f\xc6\x5d\xf4\x68\xff\x96" + "\x0a\xa1\x38\xcf\x43\xda\x71\x08" + "\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57" + "\xee\x85\x1c\x90\x27\xbe\x32\xc9" + "\x60\xf7\x6b\x02\x99\x0d\xa4\x3b" + "\xd2\x46\xdd\x74\x0b\x7f\x16\xad" + "\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f" + "\x93\x2a\xc1\x35\xcc\x63\xfa\x6e" + "\x05\x9c\x10\xa7\x3e\xd5\x49\xe0" + "\x77\x0e\x82\x19\xb0\x24\xbb\x52" + "\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4" + "\x38\xcf\x66\xfd\x71\x08\x9f\x13" + "\xaa\x41\xd8\x4c\xe3\x7a\x11\x85" + "\x1c\xb3\x27\xbe\x55\xec\x60\xf7" + "\x8e\x02\x99\x30\xc7\x3b\xd2\x69" + "\x00\x74\x0b\xa2\x16\xad\x44\xdb" + "\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a" + "\xc1\x58\xef\x63\xfa\x91\x05\x9c" + "\x33\xca\x3e\xd5\x6c\x03\x77\x0e" + "\xa5\x19\xb0\x47\xde\x52\xe9\x80" + "\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2" + "\x66\xfd\x94\x08\x9f\x36\xcd\x41" + "\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3" + "\x4a\xe1\x55\xec\x83\x1a\x8e\x25" + "\xbc\x30\xc7\x5e\xf5\x69\x00\x97" + "\x0b\xa2\x39\xd0\x44\xdb\x72\x09" + "\x7d\x14\xab\x1f\xb6\x4d\xe4\x58" + "\xef\x86\x1d\x91\x28\xbf\x33\xca" + "\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c" + "\xd3\x47\xde\x75\x0c\x80\x17\xae" + "\x22\xb9\x50\xe7\x5b\xf2\x89\x20" + "\x94\x2b\xc2\x36\xcd\x64\xfb\x6f" + "\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1" + "\x78\x0f\x83\x1a\xb1\x25\xbc\x53" + "\xea\x5e\xf5\x8c\x00\x97\x2e\xc5" + "\x39\xd0\x67\xfe\x72\x09\xa0\x14" + "\xab\x42\xd9\x4d\xe4\x7b\x12\x86" + "\x1d\xb4\x28\xbf\x56\xed\x61\xf8" + "\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a" + "\x01\x75\x0c\xa3\x17\xae\x45\xdc" + "\x50\xe7\x7e\x15\x89\x20\xb7\x2b" + "\xc2\x59\xf0\x64\xfb\x92\x06\x9d" + "\x34\xcb\x3f\xd6\x6d\x04\x78\x0f" + "\xa6\x1a\xb1\x48\xdf\x53\xea\x81" + "\x18\x8c\x23\xba\x2e\xc5\x5c\xf3" + "\x67\xfe\x95\x09\xa0\x37\xce\x42" + "\xd9\x70\x07\x7b\x12\xa9\x1d\xb4" + "\x4b\xe2\x56\xed\x84\x1b\x8f\x26" + "\xbd\x31\xc8\x5f\xf6\x6a\x01\x98", + .psize = 2048, + .digest = "\xfb\x3a\x7a\xda", + } +}; + +/* * CRC32C test vectors */ #define CRC32C_TEST_VECTORS 15 diff --git a/crypto/zlib.c b/crypto/zlib.c index 0eefa9d..d51a30a 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -78,7 +78,7 @@ static void zlib_exit(struct crypto_tfm *tfm) } -static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params, +static int zlib_compress_setup(struct crypto_pcomp *tfm, const void *params, unsigned int len) { struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm)); @@ -209,7 +209,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm, } -static int zlib_decompress_setup(struct crypto_pcomp *tfm, void *params, +static int zlib_decompress_setup(struct crypto_pcomp *tfm, const void *params, unsigned int len) { struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm)); diff --git a/drivers/char/random.c b/drivers/char/random.c index 9cd6968..159d070 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -660,7 +660,7 @@ retry: r->entropy_total = 0; if (r == &nonblocking_pool) { prandom_reseed_late(); - wake_up_interruptible(&urandom_init_wait); + wake_up_all(&urandom_init_wait); pr_notice("random: %s pool is initialized\n", r->name); } } @@ -1245,6 +1245,18 @@ void get_random_bytes(void *buf, int nbytes) EXPORT_SYMBOL(get_random_bytes); /* + * Equivalent function to get_random_bytes with the difference that this + * function blocks the request until the nonblocking_pool is initialized. + */ +void get_blocking_random_bytes(void *buf, int nbytes) +{ + if (unlikely(nonblocking_pool.initialized == 0)) + wait_event(urandom_init_wait, nonblocking_pool.initialized); + extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0); +} +EXPORT_SYMBOL(get_blocking_random_bytes); + +/* * This function will use the architecture-specific hardware random * number generator if it is available. The arch-specific hw RNG will * almost certainly be faster than what we can do in software, but it diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 033c0c8..6b0579f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS To compile this driver as a module, choose M here: the module will be called talitos. +config CRYPTO_DEV_TALITOS1 + bool "SEC1 (SEC 1.0 and SEC Lite 1.2)" + depends on CRYPTO_DEV_TALITOS + depends on PPC_8xx || PPC_82xx + default y + help + Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0 + found on MPC82xx or the Freescale Security Engine (SEC Lite) + version 1.2 found on MPC8xx + +config CRYPTO_DEV_TALITOS2 + bool "SEC2+ (SEC version 2.0 or upper)" + depends on CRYPTO_DEV_TALITOS + default y if !PPC_8xx + help + Say 'Y' here to use the Freescale Security Engine (SEC) + version 2 and following as found on MPC83xx, MPC85xx, etc ... + config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE @@ -312,11 +330,13 @@ config CRYPTO_DEV_S5P algorithms execution. config CRYPTO_DEV_NX - bool "Support for IBM Power7+ in-Nest cryptographic acceleration" - depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN - default n + bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration" + depends on PPC64 help - Support for Power7+ in-Nest cryptographic acceleration. + This enables support for the NX hardware cryptographic accelerator + coprocessor that is in IBM PowerPC P7+ or later processors. This + does not actually enable any drivers, it only allows you to select + which acceleration type (encryption and/or compression) to enable. if CRYPTO_DEV_NX source "drivers/crypto/nx/Kconfig" diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 29071a1..3d850ab 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -258,7 +258,7 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, static int aead_null_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -383,7 +383,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); @@ -449,7 +449,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead) static int aead_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct crypto_tfm *ctfm = crypto_aead_tfm(aead); const char *alg_name = crypto_tfm_alg_name(ctfm); @@ -510,7 +510,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); @@ -518,7 +518,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* read assoc before reading payload */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); - aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); + aead_append_ld_iv(desc, ivsize, ctx1_iv_off); /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) @@ -577,7 +577,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); @@ -586,7 +586,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | KEY_VLF); - aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off); + aead_append_ld_iv(desc, ivsize, ctx1_iv_off); /* Load Counter into CONTEXT1 reg */ if (is_rfc3686) @@ -645,20 +645,20 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Generate IV */ geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); append_move(desc, MOVE_WAITCOMP | MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX | (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); /* Copy IV to class 1 context */ append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO | (ctx1_iv_off << MOVE_OFFSET_SHIFT) | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Return to encryption */ append_operation(desc, ctx->class2_alg_type | @@ -676,10 +676,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Copy iv from outfifo to class 2 fifo */ moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 | - NFIFOENTRY_DTYPE_MSG | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); - append_load_imm_u32(desc, tfm->ivsize, LDST_CLASS_2_CCB | + append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB | LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM); /* Load Counter into CONTEXT1 reg */ @@ -698,7 +698,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ); /* Not need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP); /* Will read cryptlen */ @@ -738,7 +738,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -781,7 +781,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG1, REG2, REG3, CAAM_CMD_SZ); @@ -791,7 +791,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_MATH_Z); /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); /* if assoclen is ZERO, skip reading the assoc data */ @@ -824,7 +824,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL | JUMP_COND_MATH_Z); /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); /* read assoc data */ @@ -836,7 +836,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) /* read IV - is the only input data */ set_jump_tgt_here(desc, zero_assoc_jump_cmd2); - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | FIFOLD_TYPE_LAST1); @@ -888,14 +888,14 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, REG1, REG3, REG2, CAAM_CMD_SZ); /* read IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); /* jump to zero-payload command if cryptlen is zero */ @@ -968,7 +968,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -1012,7 +1012,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); /* assoclen + cryptlen = seqinlen - ivsize */ - append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); + append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); @@ -1021,7 +1021,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); /* Read AES-GCM-ESP IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); /* Read assoc data */ @@ -1085,7 +1085,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - icvsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize); + ctx->authsize + ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); @@ -1098,7 +1098,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen), 4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV); /* Read AES-GCM-ESP IV */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 | + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1); /* Read assoc data */ @@ -1161,17 +1161,17 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) /* Generate IV */ geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); move_cmd = append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); /* Copy generated IV to OFIFO */ write_iv_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_OUTFIFO | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Class 1 operation */ append_operation(desc, ctx->class1_alg_type | @@ -1199,7 +1199,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) /* End of blank commands */ /* No need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP); /* Read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | @@ -1249,7 +1249,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct aead_tfm *tfm = &aead->base.crt_aead; + unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; @@ -1291,7 +1291,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Load AES-GMAC ESP IV into Math1 register */ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | - LDST_CLASS_DECO | tfm->ivsize); + LDST_CLASS_DECO | ivsize); /* Wait the DMA transaction to finish */ append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | @@ -1299,11 +1299,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Overwrite blank immediate AES-GMAC ESP IV data */ write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Overwrite blank immediate AAD data */ write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* cryptlen = seqoutlen - authsize */ append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); @@ -1313,7 +1313,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Read Salt and AES-GMAC ESP IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); + FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize)); /* Append Salt */ append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); set_move_tgt_here(desc, write_iv_cmd); @@ -1344,7 +1344,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Authenticate AES-GMAC ESP IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); + FIFOLD_TYPE_AAD | ivsize); set_move_tgt_here(desc, write_aad_cmd); /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ append_cmd(desc, 0x00000000); @@ -1407,7 +1407,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Load AES-GMAC ESP IV into Math1 register */ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 | - LDST_CLASS_DECO | tfm->ivsize); + LDST_CLASS_DECO | ivsize); /* Wait the DMA transaction to finish */ append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM | @@ -1418,11 +1418,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Overwrite blank immediate AES-GMAC ESP IV data */ write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Overwrite blank immediate AAD data */ write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); @@ -1440,7 +1440,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Read Salt and AES-GMAC ESP IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); + FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize)); /* Append Salt */ append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); set_move_tgt_here(desc, write_iv_cmd); @@ -1461,7 +1461,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Authenticate AES-GMAC ESP IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); + FIFOLD_TYPE_AAD | ivsize); set_move_tgt_here(desc, write_aad_cmd); /* Blank commands. Will be overwritten by AES-GMAC ESP IV. */ append_cmd(desc, 0x00000000); @@ -1527,26 +1527,26 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Generate IV */ geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO | NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 | - NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT); + NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT); append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB | LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM); append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); /* Move generated IV to Math1 register */ append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_MATH1 | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); /* Overwrite blank immediate AES-GMAC IV data */ write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Overwrite blank immediate AAD data */ write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Copy generated IV to OFIFO */ append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_OUTFIFO | - (tfm->ivsize << MOVE_LEN_SHIFT)); + (ivsize << MOVE_LEN_SHIFT)); /* Class 1 operation */ append_operation(desc, ctx->class1_alg_type | @@ -1573,7 +1573,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Read Salt and AES-GMAC generated IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize)); + FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize)); /* Append Salt */ append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4); set_move_tgt_here(desc, write_iv_cmd); @@ -1583,7 +1583,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* End of blank commands */ /* No need to reload iv */ - append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP); + append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP); /* Read assoc data */ append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF | @@ -1594,7 +1594,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) /* Authenticate AES-GMAC IV */ append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE | - FIFOLD_TYPE_AAD | tfm->ivsize); + FIFOLD_TYPE_AAD | ivsize); set_move_tgt_here(desc, write_aad_cmd); /* Blank commands. Will be overwritten by AES-GMAC IV. */ append_cmd(desc, 0x00000000); @@ -3379,11 +3379,7 @@ struct caam_alg_template { u32 type; union { struct ablkcipher_alg ablkcipher; - struct aead_alg aead; - struct blkcipher_alg blkcipher; - struct cipher_alg cipher; - struct compress_alg compress; - struct rng_alg rng; + struct old_aead_alg aead; } template_u; u32 class1_alg_type; u32 class2_alg_type; diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index acd7743..f57f395 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -32,7 +32,7 @@ #include <crypto/des.h> #include <crypto/sha.h> #include <crypto/md5.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/scatterwalk.h> #include <crypto/internal/skcipher.h> diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 71f2e3c..542453c 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -53,7 +53,6 @@ struct ccp_dm_workarea { struct ccp_sg_workarea { struct scatterlist *sg; unsigned int nents; - unsigned int length; struct scatterlist *dma_sg; struct device *dma_dev; @@ -497,7 +496,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, return 0; wa->nents = sg_nents(sg); - wa->length = sg->length; wa->bytes_left = len; wa->sg_used = 0; diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c index b1c20b2..c0aa5c5 100644 --- a/drivers/crypto/ccp/ccp-platform.c +++ b/drivers/crypto/ccp/ccp-platform.c @@ -174,8 +174,6 @@ static int ccp_platform_probe(struct platform_device *pdev) } ccp->io_regs = ccp->io_map; - if (!dev->dma_mask) - dev->dma_mask = &dev->coherent_dma_mask; ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); if (ret) { dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 48f4535..7ba495f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -25,7 +25,7 @@ #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/algapi.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/scatterwalk.h> @@ -575,7 +575,8 @@ static int init_tfm_ablk(struct crypto_tfm *tfm) static int init_tfm_aead(struct crypto_tfm *tfm) { - tfm->crt_aead.reqsize = sizeof(struct aead_ctx); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_ctx)); return init_tfm(tfm); } @@ -1096,7 +1097,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize) { struct ixp_ctx *ctx = crypto_aead_ctx(tfm); u32 *flags = &tfm->base.crt_flags; - unsigned digest_len = crypto_aead_alg(tfm)->maxauthsize; + unsigned digest_len = crypto_aead_maxauthsize(tfm); int ret; if (!ctx->enckey_len && !ctx->authkey_len) @@ -1138,7 +1139,7 @@ out: static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - int max = crypto_aead_alg(tfm)->maxauthsize >> 2; + int max = crypto_aead_maxauthsize(tfm) >> 2; if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3)) return -EINVAL; diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index f91f15d..eb645c2 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -595,7 +595,7 @@ static int queue_manag(void *data) cpg->eng_st = ENGINE_IDLE; do { struct crypto_async_request *async_req = NULL; - struct crypto_async_request *backlog; + struct crypto_async_request *backlog = NULL; __set_current_state(TASK_INTERRUPTIBLE); @@ -1041,23 +1041,23 @@ static int mv_probe(struct platform_device *pdev) spin_lock_init(&cp->lock); crypto_init_queue(&cp->queue, 50); - cp->reg = ioremap(res->start, resource_size(res)); - if (!cp->reg) { - ret = -ENOMEM; + cp->reg = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cp->reg)) { + ret = PTR_ERR(cp->reg); goto err; } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram"); if (!res) { ret = -ENXIO; - goto err_unmap_reg; + goto err; } cp->sram_size = resource_size(res); cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE; cp->sram = ioremap(res->start, cp->sram_size); if (!cp->sram) { ret = -ENOMEM; - goto err_unmap_reg; + goto err; } if (pdev->dev.of_node) @@ -1136,8 +1136,6 @@ err_thread: kthread_stop(cp->queue_th); err_unmap_sram: iounmap(cp->sram); -err_unmap_reg: - iounmap(cp->reg); err: kfree(cp); cpg = NULL; @@ -1158,7 +1156,6 @@ static int mv_remove(struct platform_device *pdev) free_irq(cp->irq, cp); memset(cp->sram, 0, cp->sram_size); iounmap(cp->sram); - iounmap(cp->reg); if (!IS_ERR(cp->clk)) { clk_disable_unprepare(cp->clk); diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index afd136b..722392f 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1281,10 +1281,10 @@ static const char md5_zero[MD5_DIGEST_SIZE] = { 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, }; static const u32 md5_init[MD5_HASH_WORDS] = { - cpu_to_le32(0x67452301), - cpu_to_le32(0xefcdab89), - cpu_to_le32(0x98badcfe), - cpu_to_le32(0x10325476), + cpu_to_le32(MD5_H0), + cpu_to_le32(MD5_H1), + cpu_to_le32(MD5_H2), + cpu_to_le32(MD5_H3), }; static const char sha1_zero[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig index f826166..3e621ad 100644 --- a/drivers/crypto/nx/Kconfig +++ b/drivers/crypto/nx/Kconfig @@ -1,7 +1,9 @@ + config CRYPTO_DEV_NX_ENCRYPT - tristate "Encryption acceleration support" - depends on PPC64 && IBMVIO + tristate "Encryption acceleration support on pSeries platform" + depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN default y + select CRYPTO_ALGAPI select CRYPTO_AES select CRYPTO_CBC select CRYPTO_ECB @@ -12,15 +14,50 @@ config CRYPTO_DEV_NX_ENCRYPT select CRYPTO_SHA256 select CRYPTO_SHA512 help - Support for Power7+ in-Nest encryption acceleration. This - module supports acceleration for AES and SHA2 algorithms. If you - choose 'M' here, this module will be called nx_crypto. + Support for PowerPC Nest (NX) encryption acceleration. This + module supports acceleration for AES and SHA2 algorithms on + the pSeries platform. If you choose 'M' here, this module + will be called nx_crypto. config CRYPTO_DEV_NX_COMPRESS tristate "Compression acceleration support" - depends on PPC64 && IBMVIO default y help - Support for Power7+ in-Nest compression acceleration. This - module supports acceleration for AES and SHA2 algorithms. If you - choose 'M' here, this module will be called nx_compress. + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. One of the platform drivers must be selected also. + If you choose 'M' here, this module will be called nx_compress. + +if CRYPTO_DEV_NX_COMPRESS + +config CRYPTO_DEV_NX_COMPRESS_PSERIES + tristate "Compression acceleration support on pSeries platform" + depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN + default y + help + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. This supports NX hardware on the pSeries platform. + If you choose 'M' here, this module will be called nx_compress_pseries. + +config CRYPTO_DEV_NX_COMPRESS_POWERNV + tristate "Compression acceleration support on PowerNV platform" + depends on PPC_POWERNV + default y + help + Support for PowerPC Nest (NX) compression acceleration. This + module supports acceleration for compressing memory with the 842 + algorithm. This supports NX hardware on the PowerNV platform. + If you choose 'M' here, this module will be called nx_compress_powernv. + +config CRYPTO_DEV_NX_COMPRESS_CRYPTO + tristate "Compression acceleration cryptographic interface" + select CRYPTO_ALGAPI + select 842_DECOMPRESS + default y + help + Support for PowerPC Nest (NX) accelerators using the cryptographic + API. If you choose 'M' here, this module will be called + nx_compress_crypto. + +endif diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile index bb770ea..868b5e6 100644 --- a/drivers/crypto/nx/Makefile +++ b/drivers/crypto/nx/Makefile @@ -11,4 +11,10 @@ nx-crypto-objs := nx.o \ nx-sha512.o obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o +obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o nx-compress-objs := nx-842.o +nx-compress-pseries-objs := nx-842-pseries.o +nx-compress-powernv-objs := nx-842-powernv.o +nx-compress-crypto-objs := nx-842-crypto.o diff --git a/drivers/crypto/nx/nx-842-crypto.c b/drivers/crypto/nx/nx-842-crypto.c new file mode 100644 index 0000000..2ffa103 --- /dev/null +++ b/drivers/crypto/nx/nx-842-crypto.c @@ -0,0 +1,579 @@ +/* + * Cryptographic API for the NX-842 hardware compression. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) IBM Corporation, 2011-2015 + * + * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> + * + * Rewrite: Dan Streetman <ddstreet@ieee.org> + * + * This is an interface to the NX-842 compression hardware in PowerPC + * processors. Most of the complexity of this drvier is due to the fact that + * the NX-842 compression hardware requires the input and output data buffers + * to be specifically aligned, to be a specific multiple in length, and within + * specific minimum and maximum lengths. Those restrictions, provided by the + * nx-842 driver via nx842_constraints, mean this driver must use bounce + * buffers and headers to correct misaligned in or out buffers, and to split + * input buffers that are too large. + * + * This driver will fall back to software decompression if the hardware + * decompression fails, so this driver's decompression should never fail as + * long as the provided compressed buffer is valid. Any compressed buffer + * created by this driver will have a header (except ones where the input + * perfectly matches the constraints); so users of this driver cannot simply + * pass a compressed buffer created by this driver over to the 842 software + * decompression library. Instead, users must use this driver to decompress; + * if the hardware fails or is unavailable, the compressed buffer will be + * parsed and the header removed, and the raw 842 buffer(s) passed to the 842 + * software decompression library. + * + * This does not fall back to software compression, however, since the caller + * of this function is specifically requesting hardware compression; if the + * hardware compression fails, the caller can fall back to software + * compression, and the raw 842 compressed buffer that the software compressor + * creates can be passed to this driver for hardware decompression; any + * buffer without our specific header magic is assumed to be a raw 842 buffer + * and passed directly to the hardware. Note that the software compression + * library will produce a compressed buffer that is incompatible with the + * hardware decompressor if the original input buffer length is not a multiple + * of 8; if such a compressed buffer is passed to this driver for + * decompression, the hardware will reject it and this driver will then pass + * it over to the software library for decompression. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/crypto.h> +#include <linux/vmalloc.h> +#include <linux/nx842.h> +#include <linux/sw842.h> +#include <linux/ratelimit.h> + +/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit + * template (see lib/842/842.h), so this magic number will never appear at + * the start of a raw 842 compressed buffer. That is important, as any buffer + * passed to us without this magic is assumed to be a raw 842 compressed + * buffer, and passed directly to the hardware to decompress. + */ +#define NX842_CRYPTO_MAGIC (0xf842) +#define NX842_CRYPTO_GROUP_MAX (0x20) +#define NX842_CRYPTO_HEADER_SIZE(g) \ + (sizeof(struct nx842_crypto_header) + \ + sizeof(struct nx842_crypto_header_group) * (g)) +#define NX842_CRYPTO_HEADER_MAX_SIZE \ + NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX) + +/* bounce buffer size */ +#define BOUNCE_BUFFER_ORDER (2) +#define BOUNCE_BUFFER_SIZE \ + ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER)) + +/* try longer on comp because we can fallback to sw decomp if hw is busy */ +#define COMP_BUSY_TIMEOUT (250) /* ms */ +#define DECOMP_BUSY_TIMEOUT (50) /* ms */ + +struct nx842_crypto_header_group { + __be16 padding; /* unused bytes at start of group */ + __be32 compressed_length; /* compressed bytes in group */ + __be32 uncompressed_length; /* bytes after decompression */ +} __packed; + +struct nx842_crypto_header { + __be16 magic; /* NX842_CRYPTO_MAGIC */ + __be16 ignore; /* decompressed end bytes to ignore */ + u8 groups; /* total groups in this header */ + struct nx842_crypto_header_group group[]; +} __packed; + +struct nx842_crypto_param { + u8 *in; + unsigned int iremain; + u8 *out; + unsigned int oremain; + unsigned int ototal; +}; + +static int update_param(struct nx842_crypto_param *p, + unsigned int slen, unsigned int dlen) +{ + if (p->iremain < slen) + return -EOVERFLOW; + if (p->oremain < dlen) + return -ENOSPC; + + p->in += slen; + p->iremain -= slen; + p->out += dlen; + p->oremain -= dlen; + p->ototal += dlen; + + return 0; +} + +struct nx842_crypto_ctx { + u8 *wmem; + u8 *sbounce, *dbounce; + + struct nx842_crypto_header header; + struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; +}; + +static int nx842_crypto_init(struct crypto_tfm *tfm) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->wmem = kmalloc(NX842_MEM_COMPRESS, GFP_KERNEL); + ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); + ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER); + if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) { + kfree(ctx->wmem); + free_page((unsigned long)ctx->sbounce); + free_page((unsigned long)ctx->dbounce); + return -ENOMEM; + } + + return 0; +} + +static void nx842_crypto_exit(struct crypto_tfm *tfm) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + + kfree(ctx->wmem); + free_page((unsigned long)ctx->sbounce); + free_page((unsigned long)ctx->dbounce); +} + +static int read_constraints(struct nx842_constraints *c) +{ + int ret; + + ret = nx842_constraints(c); + if (ret) { + pr_err_ratelimited("could not get nx842 constraints : %d\n", + ret); + return ret; + } + + /* limit maximum, to always have enough bounce buffer to decompress */ + if (c->maximum > BOUNCE_BUFFER_SIZE) { + c->maximum = BOUNCE_BUFFER_SIZE; + pr_info_once("limiting nx842 maximum to %x\n", c->maximum); + } + + return 0; +} + +static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf) +{ + int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups); + + /* compress should have added space for header */ + if (s > be16_to_cpu(hdr->group[0].padding)) { + pr_err("Internal error: no space for header\n"); + return -EINVAL; + } + + memcpy(buf, hdr, s); + + print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0); + + return 0; +} + +static int compress(struct nx842_crypto_ctx *ctx, + struct nx842_crypto_param *p, + struct nx842_crypto_header_group *g, + struct nx842_constraints *c, + u16 *ignore, + unsigned int hdrsize) +{ + unsigned int slen = p->iremain, dlen = p->oremain, tmplen; + unsigned int adj_slen = slen; + u8 *src = p->in, *dst = p->out; + int ret, dskip = 0; + ktime_t timeout; + + if (p->iremain == 0) + return -EOVERFLOW; + + if (p->oremain == 0 || hdrsize + c->minimum > dlen) + return -ENOSPC; + + if (slen % c->multiple) + adj_slen = round_up(slen, c->multiple); + if (slen < c->minimum) + adj_slen = c->minimum; + if (slen > c->maximum) + adj_slen = slen = c->maximum; + if (adj_slen > slen || (u64)src % c->alignment) { + adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE); + slen = min(slen, BOUNCE_BUFFER_SIZE); + if (adj_slen > slen) + memset(ctx->sbounce + slen, 0, adj_slen - slen); + memcpy(ctx->sbounce, src, slen); + src = ctx->sbounce; + slen = adj_slen; + pr_debug("using comp sbounce buffer, len %x\n", slen); + } + + dst += hdrsize; + dlen -= hdrsize; + + if ((u64)dst % c->alignment) { + dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst); + dst += dskip; + dlen -= dskip; + } + if (dlen % c->multiple) + dlen = round_down(dlen, c->multiple); + if (dlen < c->minimum) { +nospc: + dst = ctx->dbounce; + dlen = min(p->oremain, BOUNCE_BUFFER_SIZE); + dlen = round_down(dlen, c->multiple); + dskip = 0; + pr_debug("using comp dbounce buffer, len %x\n", dlen); + } + if (dlen > c->maximum) + dlen = c->maximum; + + tmplen = dlen; + timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT); + do { + dlen = tmplen; /* reset dlen, if we're retrying */ + ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem); + /* possibly we should reduce the slen here, instead of + * retrying with the dbounce buffer? + */ + if (ret == -ENOSPC && dst != ctx->dbounce) + goto nospc; + } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); + if (ret) + return ret; + + dskip += hdrsize; + + if (dst == ctx->dbounce) + memcpy(p->out + dskip, dst, dlen); + + g->padding = cpu_to_be16(dskip); + g->compressed_length = cpu_to_be32(dlen); + g->uncompressed_length = cpu_to_be32(slen); + + if (p->iremain < slen) { + *ignore = slen - p->iremain; + slen = p->iremain; + } + + pr_debug("compress slen %x ignore %x dlen %x padding %x\n", + slen, *ignore, dlen, dskip); + + return update_param(p, slen, dskip + dlen); +} + +static int nx842_crypto_compress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + struct nx842_crypto_header *hdr = &ctx->header; + struct nx842_crypto_param p; + struct nx842_constraints c; + unsigned int groups, hdrsize, h; + int ret, n; + bool add_header; + u16 ignore = 0; + + p.in = (u8 *)src; + p.iremain = slen; + p.out = dst; + p.oremain = *dlen; + p.ototal = 0; + + *dlen = 0; + + ret = read_constraints(&c); + if (ret) + return ret; + + groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX, + DIV_ROUND_UP(p.iremain, c.maximum)); + hdrsize = NX842_CRYPTO_HEADER_SIZE(groups); + + /* skip adding header if the buffers meet all constraints */ + add_header = (p.iremain % c.multiple || + p.iremain < c.minimum || + p.iremain > c.maximum || + (u64)p.in % c.alignment || + p.oremain % c.multiple || + p.oremain < c.minimum || + p.oremain > c.maximum || + (u64)p.out % c.alignment); + + hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC); + hdr->groups = 0; + hdr->ignore = 0; + + while (p.iremain > 0) { + n = hdr->groups++; + if (hdr->groups > NX842_CRYPTO_GROUP_MAX) + return -ENOSPC; + + /* header goes before first group */ + h = !n && add_header ? hdrsize : 0; + + if (ignore) + pr_warn("interal error, ignore is set %x\n", ignore); + + ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h); + if (ret) + return ret; + } + + if (!add_header && hdr->groups > 1) { + pr_err("Internal error: No header but multiple groups\n"); + return -EINVAL; + } + + /* ignore indicates the input stream needed to be padded */ + hdr->ignore = cpu_to_be16(ignore); + if (ignore) + pr_debug("marked %d bytes as ignore\n", ignore); + + if (add_header) + ret = nx842_crypto_add_header(hdr, dst); + if (ret) + return ret; + + *dlen = p.ototal; + + pr_debug("compress total slen %x dlen %x\n", slen, *dlen); + + return 0; +} + +static int decompress(struct nx842_crypto_ctx *ctx, + struct nx842_crypto_param *p, + struct nx842_crypto_header_group *g, + struct nx842_constraints *c, + u16 ignore, + bool usehw) +{ + unsigned int slen = be32_to_cpu(g->compressed_length); + unsigned int required_len = be32_to_cpu(g->uncompressed_length); + unsigned int dlen = p->oremain, tmplen; + unsigned int adj_slen = slen; + u8 *src = p->in, *dst = p->out; + u16 padding = be16_to_cpu(g->padding); + int ret, spadding = 0, dpadding = 0; + ktime_t timeout; + + if (!slen || !required_len) + return -EINVAL; + + if (p->iremain <= 0 || padding + slen > p->iremain) + return -EOVERFLOW; + + if (p->oremain <= 0 || required_len - ignore > p->oremain) + return -ENOSPC; + + src += padding; + + if (!usehw) + goto usesw; + + if (slen % c->multiple) + adj_slen = round_up(slen, c->multiple); + if (slen < c->minimum) + adj_slen = c->minimum; + if (slen > c->maximum) + goto usesw; + if (slen < adj_slen || (u64)src % c->alignment) { + /* we can append padding bytes because the 842 format defines + * an "end" template (see lib/842/842_decompress.c) and will + * ignore any bytes following it. + */ + if (slen < adj_slen) + memset(ctx->sbounce + slen, 0, adj_slen - slen); + memcpy(ctx->sbounce, src, slen); + src = ctx->sbounce; + spadding = adj_slen - slen; + slen = adj_slen; + pr_debug("using decomp sbounce buffer, len %x\n", slen); + } + + if (dlen % c->multiple) + dlen = round_down(dlen, c->multiple); + if (dlen < required_len || (u64)dst % c->alignment) { + dst = ctx->dbounce; + dlen = min(required_len, BOUNCE_BUFFER_SIZE); + pr_debug("using decomp dbounce buffer, len %x\n", dlen); + } + if (dlen < c->minimum) + goto usesw; + if (dlen > c->maximum) + dlen = c->maximum; + + tmplen = dlen; + timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT); + do { + dlen = tmplen; /* reset dlen, if we're retrying */ + ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem); + } while (ret == -EBUSY && ktime_before(ktime_get(), timeout)); + if (ret) { +usesw: + /* reset everything, sw doesn't have constraints */ + src = p->in + padding; + slen = be32_to_cpu(g->compressed_length); + spadding = 0; + dst = p->out; + dlen = p->oremain; + dpadding = 0; + if (dlen < required_len) { /* have ignore bytes */ + dst = ctx->dbounce; + dlen = BOUNCE_BUFFER_SIZE; + } + pr_info_ratelimited("using software 842 decompression\n"); + ret = sw842_decompress(src, slen, dst, &dlen); + } + if (ret) + return ret; + + slen -= spadding; + + dlen -= ignore; + if (ignore) + pr_debug("ignoring last %x bytes\n", ignore); + + if (dst == ctx->dbounce) + memcpy(p->out, dst, dlen); + + pr_debug("decompress slen %x padding %x dlen %x ignore %x\n", + slen, padding, dlen, ignore); + + return update_param(p, slen + padding, dlen); +} + +static int nx842_crypto_decompress(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen) +{ + struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); + struct nx842_crypto_header *hdr; + struct nx842_crypto_param p; + struct nx842_constraints c; + int n, ret, hdr_len; + u16 ignore = 0; + bool usehw = true; + + p.in = (u8 *)src; + p.iremain = slen; + p.out = dst; + p.oremain = *dlen; + p.ototal = 0; + + *dlen = 0; + + if (read_constraints(&c)) + usehw = false; + + hdr = (struct nx842_crypto_header *)src; + + /* If it doesn't start with our header magic number, assume it's a raw + * 842 compressed buffer and pass it directly to the hardware driver + */ + if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) { + struct nx842_crypto_header_group g = { + .padding = 0, + .compressed_length = cpu_to_be32(p.iremain), + .uncompressed_length = cpu_to_be32(p.oremain), + }; + + ret = decompress(ctx, &p, &g, &c, 0, usehw); + if (ret) + return ret; + + *dlen = p.ototal; + + return 0; + } + + if (!hdr->groups) { + pr_err("header has no groups\n"); + return -EINVAL; + } + if (hdr->groups > NX842_CRYPTO_GROUP_MAX) { + pr_err("header has too many groups %x, max %x\n", + hdr->groups, NX842_CRYPTO_GROUP_MAX); + return -EINVAL; + } + + hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups); + if (hdr_len > slen) + return -EOVERFLOW; + + memcpy(&ctx->header, src, hdr_len); + hdr = &ctx->header; + + for (n = 0; n < hdr->groups; n++) { + /* ignore applies to last group */ + if (n + 1 == hdr->groups) + ignore = be16_to_cpu(hdr->ignore); + + ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw); + if (ret) + return ret; + } + + *dlen = p.ototal; + + pr_debug("decompress total slen %x dlen %x\n", slen, *dlen); + + return 0; +} + +static struct crypto_alg alg = { + .cra_name = "842", + .cra_driver_name = "842-nx", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct nx842_crypto_ctx), + .cra_module = THIS_MODULE, + .cra_init = nx842_crypto_init, + .cra_exit = nx842_crypto_exit, + .cra_u = { .compress = { + .coa_compress = nx842_crypto_compress, + .coa_decompress = nx842_crypto_decompress } } +}; + +static int __init nx842_crypto_mod_init(void) +{ + return crypto_register_alg(&alg); +} +module_init(nx842_crypto_mod_init); + +static void __exit nx842_crypto_mod_exit(void) +{ + crypto_unregister_alg(&alg); +} +module_exit(nx842_crypto_mod_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface"); +MODULE_ALIAS_CRYPTO("842"); +MODULE_ALIAS_CRYPTO("842-nx"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c new file mode 100644 index 0000000..6a9fb8b --- /dev/null +++ b/drivers/crypto/nx/nx-842-powernv.c @@ -0,0 +1,625 @@ +/* + * Driver for IBM PowerNV 842 compression accelerator + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "nx-842.h" + +#include <linux/timer.h> + +#include <asm/prom.h> +#include <asm/icswx.h> + +#define MODULE_NAME NX842_POWERNV_MODULE_NAME +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors"); + +#define WORKMEM_ALIGN (CRB_ALIGN) +#define CSB_WAIT_MAX (5000) /* ms */ + +struct nx842_workmem { + /* Below fields must be properly aligned */ + struct coprocessor_request_block crb; /* CRB_ALIGN align */ + struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */ + struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */ + /* Above fields must be properly aligned */ + + ktime_t start; + + char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */ +} __packed __aligned(WORKMEM_ALIGN); + +struct nx842_coproc { + unsigned int chip_id; + unsigned int ct; + unsigned int ci; + struct list_head list; +}; + +/* no cpu hotplug on powernv, so this list never changes after init */ +static LIST_HEAD(nx842_coprocs); +static unsigned int nx842_ct; + +/** + * setup_indirect_dde - Setup an indirect DDE + * + * The DDE is setup with the the DDE count, byte count, and address of + * first direct DDE in the list. + */ +static void setup_indirect_dde(struct data_descriptor_entry *dde, + struct data_descriptor_entry *ddl, + unsigned int dde_count, unsigned int byte_count) +{ + dde->flags = 0; + dde->count = dde_count; + dde->index = 0; + dde->length = cpu_to_be32(byte_count); + dde->address = cpu_to_be64(nx842_get_pa(ddl)); +} + +/** + * setup_direct_dde - Setup single DDE from buffer + * + * The DDE is setup with the buffer and length. The buffer must be properly + * aligned. The used length is returned. + * Returns: + * N Successfully set up DDE with N bytes + */ +static unsigned int setup_direct_dde(struct data_descriptor_entry *dde, + unsigned long pa, unsigned int len) +{ + unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa)); + + dde->flags = 0; + dde->count = 0; + dde->index = 0; + dde->length = cpu_to_be32(l); + dde->address = cpu_to_be64(pa); + + return l; +} + +/** + * setup_ddl - Setup DDL from buffer + * + * Returns: + * 0 Successfully set up DDL + */ +static int setup_ddl(struct data_descriptor_entry *dde, + struct data_descriptor_entry *ddl, + unsigned char *buf, unsigned int len, + bool in) +{ + unsigned long pa = nx842_get_pa(buf); + int i, ret, total_len = len; + + if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) { + pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n", + in ? "input" : "output", pa, DDE_BUFFER_ALIGN); + return -EINVAL; + } + + /* only need to check last mult; since buffer must be + * DDE_BUFFER_ALIGN aligned, and that is a multiple of + * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers + * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT. + */ + if (len % DDE_BUFFER_LAST_MULT) { + pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n", + in ? "input" : "output", len, DDE_BUFFER_LAST_MULT); + if (in) + return -EINVAL; + len = round_down(len, DDE_BUFFER_LAST_MULT); + } + + /* use a single direct DDE */ + if (len <= LEN_ON_PAGE(pa)) { + ret = setup_direct_dde(dde, pa, len); + WARN_ON(ret < len); + return 0; + } + + /* use the DDL */ + for (i = 0; i < DDL_LEN_MAX && len > 0; i++) { + ret = setup_direct_dde(&ddl[i], pa, len); + buf += ret; + len -= ret; + pa = nx842_get_pa(buf); + } + + if (len > 0) { + pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n", + total_len, in ? "input" : "output", len); + if (in) + return -EMSGSIZE; + total_len -= len; + } + setup_indirect_dde(dde, ddl, i, total_len); + + return 0; +} + +#define CSB_ERR(csb, msg, ...) \ + pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n", \ + ##__VA_ARGS__, (csb)->flags, \ + (csb)->cs, (csb)->cc, (csb)->ce, \ + be32_to_cpu((csb)->count)) + +#define CSB_ERR_ADDR(csb, msg, ...) \ + CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__, \ + (unsigned long)be64_to_cpu((csb)->address)) + +/** + * wait_for_csb + */ +static int wait_for_csb(struct nx842_workmem *wmem, + struct coprocessor_status_block *csb) +{ + ktime_t start = wmem->start, now = ktime_get(); + ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX); + + while (!(ACCESS_ONCE(csb->flags) & CSB_V)) { + cpu_relax(); + now = ktime_get(); + if (ktime_after(now, timeout)) + break; + } + + /* hw has updated csb and output buffer */ + barrier(); + + /* check CSB flags */ + if (!(csb->flags & CSB_V)) { + CSB_ERR(csb, "CSB still not valid after %ld us, giving up", + (long)ktime_us_delta(now, start)); + return -ETIMEDOUT; + } + if (csb->flags & CSB_F) { + CSB_ERR(csb, "Invalid CSB format"); + return -EPROTO; + } + if (csb->flags & CSB_CH) { + CSB_ERR(csb, "Invalid CSB chaining state"); + return -EPROTO; + } + + /* verify CSB completion sequence is 0 */ + if (csb->cs) { + CSB_ERR(csb, "Invalid CSB completion sequence"); + return -EPROTO; + } + + /* check CSB Completion Code */ + switch (csb->cc) { + /* no error */ + case CSB_CC_SUCCESS: + break; + case CSB_CC_TPBC_GT_SPBC: + /* not an error, but the compressed data is + * larger than the uncompressed data :( + */ + break; + + /* input data errors */ + case CSB_CC_OPERAND_OVERLAP: + /* input and output buffers overlap */ + CSB_ERR(csb, "Operand Overlap error"); + return -EINVAL; + case CSB_CC_INVALID_OPERAND: + CSB_ERR(csb, "Invalid operand"); + return -EINVAL; + case CSB_CC_NOSPC: + /* output buffer too small */ + return -ENOSPC; + case CSB_CC_ABORT: + CSB_ERR(csb, "Function aborted"); + return -EINTR; + case CSB_CC_CRC_MISMATCH: + CSB_ERR(csb, "CRC mismatch"); + return -EINVAL; + case CSB_CC_TEMPL_INVALID: + CSB_ERR(csb, "Compressed data template invalid"); + return -EINVAL; + case CSB_CC_TEMPL_OVERFLOW: + CSB_ERR(csb, "Compressed data template shows data past end"); + return -EINVAL; + + /* these should not happen */ + case CSB_CC_INVALID_ALIGN: + /* setup_ddl should have detected this */ + CSB_ERR_ADDR(csb, "Invalid alignment"); + return -EINVAL; + case CSB_CC_DATA_LENGTH: + /* setup_ddl should have detected this */ + CSB_ERR(csb, "Invalid data length"); + return -EINVAL; + case CSB_CC_WR_TRANSLATION: + case CSB_CC_TRANSLATION: + case CSB_CC_TRANSLATION_DUP1: + case CSB_CC_TRANSLATION_DUP2: + case CSB_CC_TRANSLATION_DUP3: + case CSB_CC_TRANSLATION_DUP4: + case CSB_CC_TRANSLATION_DUP5: + case CSB_CC_TRANSLATION_DUP6: + /* should not happen, we use physical addrs */ + CSB_ERR_ADDR(csb, "Translation error"); + return -EPROTO; + case CSB_CC_WR_PROTECTION: + case CSB_CC_PROTECTION: + case CSB_CC_PROTECTION_DUP1: + case CSB_CC_PROTECTION_DUP2: + case CSB_CC_PROTECTION_DUP3: + case CSB_CC_PROTECTION_DUP4: + case CSB_CC_PROTECTION_DUP5: + case CSB_CC_PROTECTION_DUP6: + /* should not happen, we use physical addrs */ + CSB_ERR_ADDR(csb, "Protection error"); + return -EPROTO; + case CSB_CC_PRIVILEGE: + /* shouldn't happen, we're in HYP mode */ + CSB_ERR(csb, "Insufficient Privilege error"); + return -EPROTO; + case CSB_CC_EXCESSIVE_DDE: + /* shouldn't happen, setup_ddl doesn't use many dde's */ + CSB_ERR(csb, "Too many DDEs in DDL"); + return -EINVAL; + case CSB_CC_TRANSPORT: + /* shouldn't happen, we setup CRB correctly */ + CSB_ERR(csb, "Invalid CRB"); + return -EINVAL; + case CSB_CC_SEGMENTED_DDL: + /* shouldn't happen, setup_ddl creates DDL right */ + CSB_ERR(csb, "Segmented DDL error"); + return -EINVAL; + case CSB_CC_DDE_OVERFLOW: + /* shouldn't happen, setup_ddl creates DDL right */ + CSB_ERR(csb, "DDE overflow error"); + return -EINVAL; + case CSB_CC_SESSION: + /* should not happen with ICSWX */ + CSB_ERR(csb, "Session violation error"); + return -EPROTO; + case CSB_CC_CHAIN: + /* should not happen, we don't use chained CRBs */ + CSB_ERR(csb, "Chained CRB error"); + return -EPROTO; + case CSB_CC_SEQUENCE: + /* should not happen, we don't use chained CRBs */ + CSB_ERR(csb, "CRB seqeunce number error"); + return -EPROTO; + case CSB_CC_UNKNOWN_CODE: + CSB_ERR(csb, "Unknown subfunction code"); + return -EPROTO; + + /* hardware errors */ + case CSB_CC_RD_EXTERNAL: + case CSB_CC_RD_EXTERNAL_DUP1: + case CSB_CC_RD_EXTERNAL_DUP2: + case CSB_CC_RD_EXTERNAL_DUP3: + CSB_ERR_ADDR(csb, "Read error outside coprocessor"); + return -EPROTO; + case CSB_CC_WR_EXTERNAL: + CSB_ERR_ADDR(csb, "Write error outside coprocessor"); + return -EPROTO; + case CSB_CC_INTERNAL: + CSB_ERR(csb, "Internal error in coprocessor"); + return -EPROTO; + case CSB_CC_PROVISION: + CSB_ERR(csb, "Storage provision error"); + return -EPROTO; + case CSB_CC_HW: + CSB_ERR(csb, "Correctable hardware error"); + return -EPROTO; + + default: + CSB_ERR(csb, "Invalid CC %d", csb->cc); + return -EPROTO; + } + + /* check Completion Extension state */ + if (csb->ce & CSB_CE_TERMINATION) { + CSB_ERR(csb, "CSB request was terminated"); + return -EPROTO; + } + if (csb->ce & CSB_CE_INCOMPLETE) { + CSB_ERR(csb, "CSB request not complete"); + return -EPROTO; + } + if (!(csb->ce & CSB_CE_TPBC)) { + CSB_ERR(csb, "TPBC not provided, unknown target length"); + return -EPROTO; + } + + /* successful completion */ + pr_debug_ratelimited("Processed %u bytes in %lu us\n", csb->count, + (unsigned long)ktime_us_delta(now, start)); + + return 0; +} + +/** + * nx842_powernv_function - compress/decompress data using the 842 algorithm + * + * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems. + * This compresses or decompresses the provided input buffer into the provided + * output buffer. + * + * Upon return from this function @outlen contains the length of the + * output data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * The @workmem buffer should only be used by one function call at a time. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS + * @fc: function code, see CCW Function Codes in nx-842.h + * + * Returns: + * 0 Success, output of length @outlenp stored in the buffer at @out + * -ENODEV Hardware unavailable + * -ENOSPC Output buffer is to small + * -EMSGSIZE Input buffer too large + * -EINVAL buffer constraints do not fix nx842_constraints + * -EPROTO hardware error during operation + * -ETIMEDOUT hardware did not complete operation in reasonable time + * -EINTR operation was aborted + */ +static int nx842_powernv_function(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *workmem, int fc) +{ + struct coprocessor_request_block *crb; + struct coprocessor_status_block *csb; + struct nx842_workmem *wmem; + int ret; + u64 csb_addr; + u32 ccw; + unsigned int outlen = *outlenp; + + wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN); + + *outlenp = 0; + + /* shoudn't happen, we don't load without a coproc */ + if (!nx842_ct) { + pr_err_ratelimited("coprocessor CT is 0"); + return -ENODEV; + } + + crb = &wmem->crb; + csb = &crb->csb; + + /* Clear any previous values */ + memset(crb, 0, sizeof(*crb)); + + /* set up DDLs */ + ret = setup_ddl(&crb->source, wmem->ddl_in, + (unsigned char *)in, inlen, true); + if (ret) + return ret; + ret = setup_ddl(&crb->target, wmem->ddl_out, + out, outlen, false); + if (ret) + return ret; + + /* set up CCW */ + ccw = 0; + ccw = SET_FIELD(ccw, CCW_CT, nx842_ct); + ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */ + ccw = SET_FIELD(ccw, CCW_FC_842, fc); + + /* set up CRB's CSB addr */ + csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS; + csb_addr |= CRB_CSB_AT; /* Addrs are phys */ + crb->csb_addr = cpu_to_be64(csb_addr); + + wmem->start = ktime_get(); + + /* do ICSWX */ + ret = icswx(cpu_to_be32(ccw), crb); + + pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret, + (unsigned int)ccw, + (unsigned int)be32_to_cpu(crb->ccw)); + + switch (ret) { + case ICSWX_INITIATED: + ret = wait_for_csb(wmem, csb); + break; + case ICSWX_BUSY: + pr_debug_ratelimited("842 Coprocessor busy\n"); + ret = -EBUSY; + break; + case ICSWX_REJECTED: + pr_err_ratelimited("ICSWX rejected\n"); + ret = -EPROTO; + break; + default: + pr_err_ratelimited("Invalid ICSWX return code %x\n", ret); + ret = -EPROTO; + break; + } + + if (!ret) + *outlenp = be32_to_cpu(csb->count); + + return ret; +} + +/** + * nx842_powernv_compress - Compress data using the 842 algorithm + * + * Compression provided by the NX842 coprocessor on IBM PowerNV systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS + * + * Returns: see @nx842_powernv_function() + */ +static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *wmem) +{ + return nx842_powernv_function(in, inlen, out, outlenp, + wmem, CCW_FC_842_COMP_NOCRC); +} + +/** + * nx842_powernv_decompress - Decompress data using the 842 algorithm + * + * Decompression provided by the NX842 coprocessor on IBM PowerNV systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * decompressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: input buffer pointer + * @inlen: input buffer size + * @out: output buffer pointer + * @outlenp: output buffer size pointer + * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS + * + * Returns: see @nx842_powernv_function() + */ +static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlenp, + void *wmem) +{ + return nx842_powernv_function(in, inlen, out, outlenp, + wmem, CCW_FC_842_DECOMP_NOCRC); +} + +static int __init nx842_powernv_probe(struct device_node *dn) +{ + struct nx842_coproc *coproc; + struct property *ct_prop, *ci_prop; + unsigned int ct, ci; + int chip_id; + + chip_id = of_get_ibm_chip_id(dn); + if (chip_id < 0) { + pr_err("ibm,chip-id missing\n"); + return -EINVAL; + } + ct_prop = of_find_property(dn, "ibm,842-coprocessor-type", NULL); + if (!ct_prop) { + pr_err("ibm,842-coprocessor-type missing\n"); + return -EINVAL; + } + ct = be32_to_cpu(*(unsigned int *)ct_prop->value); + ci_prop = of_find_property(dn, "ibm,842-coprocessor-instance", NULL); + if (!ci_prop) { + pr_err("ibm,842-coprocessor-instance missing\n"); + return -EINVAL; + } + ci = be32_to_cpu(*(unsigned int *)ci_prop->value); + + coproc = kmalloc(sizeof(*coproc), GFP_KERNEL); + if (!coproc) + return -ENOMEM; + + coproc->chip_id = chip_id; + coproc->ct = ct; + coproc->ci = ci; + INIT_LIST_HEAD(&coproc->list); + list_add(&coproc->list, &nx842_coprocs); + + pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci); + + if (!nx842_ct) + nx842_ct = ct; + else if (nx842_ct != ct) + pr_err("NX842 chip %d, CT %d != first found CT %d\n", + chip_id, ct, nx842_ct); + + return 0; +} + +static struct nx842_constraints nx842_powernv_constraints = { + .alignment = DDE_BUFFER_ALIGN, + .multiple = DDE_BUFFER_LAST_MULT, + .minimum = DDE_BUFFER_LAST_MULT, + .maximum = (DDL_LEN_MAX - 1) * PAGE_SIZE, +}; + +static struct nx842_driver nx842_powernv_driver = { + .owner = THIS_MODULE, + .constraints = &nx842_powernv_constraints, + .compress = nx842_powernv_compress, + .decompress = nx842_powernv_decompress, +}; + +static __init int nx842_powernv_init(void) +{ + struct device_node *dn; + + /* verify workmem size/align restrictions */ + BUILD_BUG_ON(sizeof(struct nx842_workmem) > NX842_MEM_COMPRESS); + BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN); + BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN); + BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN); + /* verify buffer size/align restrictions */ + BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN); + BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT); + BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT); + + pr_info("loading\n"); + + for_each_compatible_node(dn, NULL, NX842_POWERNV_COMPAT_NAME) + nx842_powernv_probe(dn); + + if (!nx842_ct) { + pr_err("no coprocessors found\n"); + return -ENODEV; + } + + nx842_register_driver(&nx842_powernv_driver); + + pr_info("loaded\n"); + + return 0; +} +module_init(nx842_powernv_init); + +static void __exit nx842_powernv_exit(void) +{ + struct nx842_coproc *coproc, *n; + + nx842_unregister_driver(&nx842_powernv_driver); + + list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) { + list_del(&coproc->list); + kfree(coproc); + } + + pr_info("unloaded\n"); +} +module_exit(nx842_powernv_exit); diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c new file mode 100644 index 0000000..85837e9 --- /dev/null +++ b/drivers/crypto/nx/nx-842-pseries.c @@ -0,0 +1,1128 @@ +/* + * Driver for IBM Power 842 compression accelerator + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> + * Seth Jennings <sjenning@linux.vnet.ibm.com> + */ + +#include <asm/vio.h> + +#include "nx-842.h" +#include "nx_csbcpb.h" /* struct nx_csbcpb */ + +#define MODULE_NAME NX842_PSERIES_MODULE_NAME +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); +MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); + +/* IO buffer must be 128 byte aligned */ +#define IO_BUFFER_ALIGN 128 + +static struct nx842_constraints nx842_pseries_constraints = { + .alignment = IO_BUFFER_ALIGN, + .multiple = DDE_BUFFER_LAST_MULT, + .minimum = IO_BUFFER_ALIGN, + .maximum = PAGE_SIZE, /* dynamic, max_sync_size */ +}; + +static int check_constraints(unsigned long buf, unsigned int *len, bool in) +{ + if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) { + pr_debug("%s buffer 0x%lx not aligned to 0x%x\n", + in ? "input" : "output", buf, + nx842_pseries_constraints.alignment); + return -EINVAL; + } + if (*len % nx842_pseries_constraints.multiple) { + pr_debug("%s buffer len 0x%x not multiple of 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.multiple); + if (in) + return -EINVAL; + *len = round_down(*len, nx842_pseries_constraints.multiple); + } + if (*len < nx842_pseries_constraints.minimum) { + pr_debug("%s buffer len 0x%x under minimum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.minimum); + return -EINVAL; + } + if (*len > nx842_pseries_constraints.maximum) { + pr_debug("%s buffer len 0x%x over maximum 0x%x\n", + in ? "input" : "output", *len, + nx842_pseries_constraints.maximum); + if (in) + return -EINVAL; + *len = nx842_pseries_constraints.maximum; + } + return 0; +} + +/* I assume we need to align the CSB? */ +#define WORKMEM_ALIGN (256) + +struct nx842_workmem { + /* scatterlist */ + char slin[4096]; + char slout[4096]; + /* coprocessor status/parameter block */ + struct nx_csbcpb csbcpb; + + char padding[WORKMEM_ALIGN]; +} __aligned(WORKMEM_ALIGN); + +/* Macros for fields within nx_csbcpb */ +/* Check the valid bit within the csbcpb valid field */ +#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) + +/* CE macros operate on the completion_extension field bits in the csbcpb. + * CE0 0=full completion, 1=partial completion + * CE1 0=CE0 indicates completion, 1=termination (output may be modified) + * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ +#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) +#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) +#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) + +/* The NX unit accepts data only on 4K page boundaries */ +#define NX842_HW_PAGE_SIZE (4096) +#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) + +enum nx842_status { + UNAVAILABLE, + AVAILABLE +}; + +struct ibm_nx842_counters { + atomic64_t comp_complete; + atomic64_t comp_failed; + atomic64_t decomp_complete; + atomic64_t decomp_failed; + atomic64_t swdecomp; + atomic64_t comp_times[32]; + atomic64_t decomp_times[32]; +}; + +static struct nx842_devdata { + struct vio_dev *vdev; + struct device *dev; + struct ibm_nx842_counters *counters; + unsigned int max_sg_len; + unsigned int max_sync_size; + unsigned int max_sync_sg; + enum nx842_status status; +} __rcu *devdata; +static DEFINE_SPINLOCK(devdata_mutex); + +#define NX842_COUNTER_INC(_x) \ +static inline void nx842_inc_##_x( \ + const struct nx842_devdata *dev) { \ + if (dev) \ + atomic64_inc(&dev->counters->_x); \ +} +NX842_COUNTER_INC(comp_complete); +NX842_COUNTER_INC(comp_failed); +NX842_COUNTER_INC(decomp_complete); +NX842_COUNTER_INC(decomp_failed); +NX842_COUNTER_INC(swdecomp); + +#define NX842_HIST_SLOTS 16 + +static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) +{ + int bucket = fls(time); + + if (bucket) + bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); + + atomic64_inc(×[bucket]); +} + +/* NX unit operation flags */ +#define NX842_OP_COMPRESS 0x0 +#define NX842_OP_CRC 0x1 +#define NX842_OP_DECOMPRESS 0x2 +#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) +#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) +#define NX842_OP_ASYNC (1<<23) +#define NX842_OP_NOTIFY (1<<22) +#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) + +static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) +{ + /* No use of DMA mappings within the driver. */ + return 0; +} + +struct nx842_slentry { + unsigned long ptr; /* Real address (use __pa()) */ + unsigned long len; +}; + +/* pHyp scatterlist entry */ +struct nx842_scatterlist { + int entry_nr; /* number of slentries */ + struct nx842_slentry *entries; /* ptr to array of slentries */ +}; + +/* Does not include sizeof(entry_nr) in the size */ +static inline unsigned long nx842_get_scatterlist_size( + struct nx842_scatterlist *sl) +{ + return sl->entry_nr * sizeof(struct nx842_slentry); +} + +static int nx842_build_scatterlist(unsigned long buf, int len, + struct nx842_scatterlist *sl) +{ + unsigned long nextpage; + struct nx842_slentry *entry; + + sl->entry_nr = 0; + + entry = sl->entries; + while (len) { + entry->ptr = nx842_get_pa((void *)buf); + nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); + if (nextpage < buf + len) { + /* we aren't at the end yet */ + if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) + /* we are in the middle (or beginning) */ + entry->len = NX842_HW_PAGE_SIZE; + else + /* we are at the beginning */ + entry->len = nextpage - buf; + } else { + /* at the end */ + entry->len = len; + } + + len -= entry->len; + buf += entry->len; + sl->entry_nr++; + entry++; + } + + return 0; +} + +static int nx842_validate_result(struct device *dev, + struct cop_status_block *csb) +{ + /* The csb must be valid after returning from vio_h_cop_sync */ + if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { + dev_err(dev, "%s: cspcbp not valid upon completion.\n", + __func__); + dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", + csb->valid, + csb->crb_seq_number, + csb->completion_code, + csb->completion_extension); + dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", + csb->processed_byte_count, + (unsigned long)csb->address); + return -EIO; + } + + /* Check return values from the hardware in the CSB */ + switch (csb->completion_code) { + case 0: /* Completed without error */ + break; + case 64: /* Target bytes > Source bytes during compression */ + case 13: /* Output buffer too small */ + dev_dbg(dev, "%s: Compression output larger than input\n", + __func__); + return -ENOSPC; + case 66: /* Input data contains an illegal template field */ + case 67: /* Template indicates data past the end of the input stream */ + dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", + __func__, csb->completion_code); + return -EINVAL; + default: + dev_dbg(dev, "%s: Unspecified error (code:%d)\n", + __func__, csb->completion_code); + return -EIO; + } + + /* Hardware sanity check */ + if (!NX842_CSBCPB_CE2(csb->completion_extension)) { + dev_err(dev, "%s: No error returned by hardware, but " + "data returned is unusable, contact support.\n" + "(Additional info: csbcbp->processed bytes " + "does not specify processed bytes for the " + "target buffer.)\n", __func__); + return -EIO; + } + + return 0; +} + +/** + * nx842_pseries_compress - Compress data using the 842 algorithm + * + * Compression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is compressed and the result is stored in the + * provided output buffer. + * + * Upon return from this function @outlen contains the length of the + * compressed data. If there is an error then @outlen will be 0 and an + * error will be specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * NX842_MEM_COMPRESS + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EIO Internal error + * -ENODEV Hardware unavailable + */ +static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, max_sync_size; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + max_sync_size = local_devdata->max_sync_size; + dev = local_devdata->dev; + + /* Init scatterlist */ + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_COMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + op.out = nx842_get_pa(slout.entries); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + ret = -EIO; + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = csbcpb->csb.processed_byte_count; + dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen); + +unlock: + if (ret) + nx842_inc_comp_failed(local_devdata); + else { + nx842_inc_comp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->comp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + rcu_read_unlock(); + return ret; +} + +/** + * nx842_pseries_decompress - Decompress data using the 842 algorithm + * + * Decompression provide by the NX842 coprocessor on IBM Power systems. + * The input buffer is decompressed and the result is stored in the + * provided output buffer. The size allocated to the output buffer is + * provided by the caller of this function in @outlen. Upon return from + * this function @outlen contains the length of the decompressed data. + * If there is an error then @outlen will be 0 and an error will be + * specified by the return code from this function. + * + * @in: Pointer to input buffer + * @inlen: Length of input buffer + * @out: Pointer to output buffer + * @outlen: Length of output buffer + * @wrkmem: ptr to buffer for working memory, size determined by + * NX842_MEM_COMPRESS + * + * Returns: + * 0 Success, output of length @outlen stored in the buffer at @out + * -ENODEV Hardware decompression device is unavailable + * -ENOMEM Unable to allocate internal buffers + * -ENOSPC Output buffer is to small + * -EINVAL Bad input data encountered when attempting decompress + * -EIO Internal error + */ +static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen, + unsigned char *out, unsigned int *outlen, + void *wmem) +{ + struct nx842_devdata *local_devdata; + struct device *dev = NULL; + struct nx842_workmem *workmem; + struct nx842_scatterlist slin, slout; + struct nx_csbcpb *csbcpb; + int ret = 0, max_sync_size; + unsigned long inbuf, outbuf; + struct vio_pfo_op op = { + .done = NULL, + .handle = 0, + .timeout = 0, + }; + unsigned long start = get_tb(); + + /* Ensure page alignment and size */ + inbuf = (unsigned long)in; + if (check_constraints(inbuf, &inlen, true)) + return -EINVAL; + + outbuf = (unsigned long)out; + if (check_constraints(outbuf, outlen, false)) + return -EINVAL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata || !local_devdata->dev) { + rcu_read_unlock(); + return -ENODEV; + } + max_sync_size = local_devdata->max_sync_size; + dev = local_devdata->dev; + + workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN); + + /* Init scatterlist */ + slin.entries = (struct nx842_slentry *)workmem->slin; + slout.entries = (struct nx842_slentry *)workmem->slout; + + /* Init operation */ + op.flags = NX842_OP_DECOMPRESS; + csbcpb = &workmem->csbcpb; + memset(csbcpb, 0, sizeof(*csbcpb)); + op.csbcpb = nx842_get_pa(csbcpb); + + if ((inbuf & NX842_HW_PAGE_MASK) == + ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.in = nx842_get_pa((void *)inbuf); + op.inlen = inlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(inbuf, inlen, &slin); + op.in = nx842_get_pa(slin.entries); + op.inlen = -nx842_get_scatterlist_size(&slin); + } + + if ((outbuf & NX842_HW_PAGE_MASK) == + ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) { + /* Create direct DDE */ + op.out = nx842_get_pa((void *)outbuf); + op.outlen = *outlen; + } else { + /* Create indirect DDE (scatterlist) */ + nx842_build_scatterlist(outbuf, *outlen, &slout); + op.out = nx842_get_pa(slout.entries); + op.outlen = -nx842_get_scatterlist_size(&slout); + } + + /* Send request to pHyp */ + ret = vio_h_cop_sync(local_devdata->vdev, &op); + + /* Check for pHyp error */ + if (ret) { + dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", + __func__, ret, op.hcall_err); + goto unlock; + } + + /* Check for hardware error */ + ret = nx842_validate_result(dev, &csbcpb->csb); + if (ret) + goto unlock; + + *outlen = csbcpb->csb.processed_byte_count; + +unlock: + if (ret) + /* decompress fail */ + nx842_inc_decomp_failed(local_devdata); + else { + nx842_inc_decomp_complete(local_devdata); + ibm_nx842_incr_hist(local_devdata->counters->decomp_times, + (get_tb() - start) / tb_ticks_per_usec); + } + + rcu_read_unlock(); + return ret; +} + +/** + * nx842_OF_set_defaults -- Set default (disabled) values for devdata + * + * @devdata - struct nx842_devdata to update + * + * Returns: + * 0 on success + * -ENOENT if @devdata ptr is NULL + */ +static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +{ + if (devdata) { + devdata->max_sync_size = 0; + devdata->max_sync_sg = 0; + devdata->max_sg_len = 0; + devdata->status = UNAVAILABLE; + return 0; + } else + return -ENOENT; +} + +/** + * nx842_OF_upd_status -- Update the device info from OF status prop + * + * The status property indicates if the accelerator is enabled. If the + * device is in the OF tree it indicates that the hardware is present. + * The status field indicates if the device is enabled when the status + * is 'okay'. Otherwise the device driver will be disabled. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 - Device is available + * -EINVAL - Device is not available + */ +static int nx842_OF_upd_status(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const char *status = (const char *)prop->value; + + if (!strncmp(status, "okay", (size_t)prop->length)) { + devdata->status = AVAILABLE; + } else { + dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", + __func__, status); + devdata->status = UNAVAILABLE; + } + + return ret; +} + +/** + * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop + * + * Definition of the 'ibm,max-sg-len' OF property: + * This field indicates the maximum byte length of a scatter list + * for the platform facility. It is a single cell encoded as with encode-int. + * + * Example: + * # od -x ibm,max-sg-len + * 0000000 0000 0ff0 + * + * In this example, the maximum byte length of a scatter list is + * 0x0ff0 (4,080). + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const int *maxsglen = prop->value; + + if (prop->length != sizeof(*maxsglen)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, + prop->length, sizeof(*maxsglen)); + ret = -EINVAL; + } else { + devdata->max_sg_len = (unsigned int)min(*maxsglen, + (int)NX842_HW_PAGE_SIZE); + } + + return ret; +} + +/** + * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop + * + * Definition of the 'ibm,max-sync-cop' OF property: + * Two series of cells. The first series of cells represents the maximums + * that can be synchronously compressed. The second series of cells + * represents the maximums that can be synchronously decompressed. + * 1. The first cell in each series contains the count of the number of + * data length, scatter list elements pairs that follow – each being + * of the form + * a. One cell data byte length + * b. One cell total number of scatter list elements + * + * Example: + * # od -x ibm,max-sync-cop + * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 + * 0000020 0000 1000 0000 01fe + * + * In this example, compression supports 0x1000 (4,096) data byte length + * and 0x1fe (510) total scatter list elements. Decompression supports + * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list + * elements. + * + * @devdata - struct nx842_devdata to update + * @prop - struct property point containing the maxsyncop for the update + * + * Returns: + * 0 on success + * -EINVAL on failure + */ +static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, + struct property *prop) { + int ret = 0; + const struct maxsynccop_t { + int comp_elements; + int comp_data_limit; + int comp_sg_limit; + int decomp_elements; + int decomp_data_limit; + int decomp_sg_limit; + } *maxsynccop; + + if (prop->length != sizeof(*maxsynccop)) { + dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); + dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, + sizeof(*maxsynccop)); + ret = -EINVAL; + goto out; + } + + maxsynccop = (const struct maxsynccop_t *)prop->value; + + /* Use one limit rather than separate limits for compression and + * decompression. Set a maximum for this so as not to exceed the + * size that the header can support and round the value down to + * the hardware page size (4K) */ + devdata->max_sync_size = + (unsigned int)min(maxsynccop->comp_data_limit, + maxsynccop->decomp_data_limit); + + devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, + 65536); + + if (devdata->max_sync_size < 4096) { + dev_err(devdata->dev, "%s: hardware max data size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_size); + ret = -EINVAL; + goto out; + } + + nx842_pseries_constraints.maximum = devdata->max_sync_size; + + devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, + maxsynccop->decomp_sg_limit); + if (devdata->max_sync_sg < 1) { + dev_err(devdata->dev, "%s: hardware max sg size (%u) is " + "less than the driver minimum, unable to use " + "the hardware device\n", + __func__, devdata->max_sync_sg); + ret = -EINVAL; + goto out; + } + +out: + return ret; +} + +/** + * + * nx842_OF_upd -- Handle OF properties updates for the device. + * + * Set all properties from the OF tree. Optionally, a new property + * can be provided by the @new_prop pointer to overwrite an existing value. + * The device will remain disabled until all values are valid, this function + * will return an error for updates unless all values are valid. + * + * @new_prop: If not NULL, this property is being updated. If NULL, update + * all properties from the current values in the OF tree. + * + * Returns: + * 0 - Success + * -ENOMEM - Could not allocate memory for new devdata structure + * -EINVAL - property value not found, new_prop is not a recognized + * property for the device or property value is not valid. + * -ENODEV - Device is not available + */ +static int nx842_OF_upd(struct property *new_prop) +{ + struct nx842_devdata *old_devdata = NULL; + struct nx842_devdata *new_devdata = NULL; + struct device_node *of_node = NULL; + struct property *status = NULL; + struct property *maxsglen = NULL; + struct property *maxsyncop = NULL; + int ret = 0; + unsigned long flags; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + if (old_devdata) + of_node = old_devdata->dev->of_node; + + if (!old_devdata || !of_node) { + pr_err("%s: device is not available\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + return -ENODEV; + } + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_out; + } + + memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); + new_devdata->counters = old_devdata->counters; + + /* Set ptrs for existing properties */ + status = of_find_property(of_node, "status", NULL); + maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); + maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); + if (!status || !maxsglen || !maxsyncop) { + dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); + ret = -EINVAL; + goto error_out; + } + + /* + * If this is a property update, there are only certain properties that + * we care about. Bail if it isn't in the below list + */ + if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || + strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) + goto out; + + /* Perform property updates */ + ret = nx842_OF_upd_status(new_devdata, status); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); + if (ret) + goto error_out; + + ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); + if (ret) + goto error_out; + +out: + dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", + __func__, new_devdata->max_sync_size, + old_devdata->max_sync_size); + dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", + __func__, new_devdata->max_sync_sg, + old_devdata->max_sync_sg); + dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", + __func__, new_devdata->max_sg_len, + old_devdata->max_sg_len); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + return 0; + +error_out: + if (new_devdata) { + dev_info(old_devdata->dev, "%s: device disabled\n", __func__); + nx842_OF_set_defaults(new_devdata); + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(new_devdata->dev, new_devdata); + kfree(old_devdata); + } else { + dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); + spin_unlock_irqrestore(&devdata_mutex, flags); + } + + if (!ret) + ret = -EINVAL; + return ret; +} + +/** + * nx842_OF_notifier - Process updates to OF properties for the device + * + * @np: notifier block + * @action: notifier action + * @update: struct pSeries_reconfig_prop_update pointer if action is + * PSERIES_UPDATE_PROPERTY + * + * Returns: + * NOTIFY_OK on success + * NOTIFY_BAD encoded with error number on failure, use + * notifier_to_errno() to decode this value + */ +static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, + void *data) +{ + struct of_reconfig_data *upd = data; + struct nx842_devdata *local_devdata; + struct device_node *node = NULL; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (local_devdata) + node = local_devdata->dev->of_node; + + if (local_devdata && + action == OF_RECONFIG_UPDATE_PROPERTY && + !strcmp(upd->dn->name, node->name)) { + rcu_read_unlock(); + nx842_OF_upd(upd->prop); + } else + rcu_read_unlock(); + + return NOTIFY_OK; +} + +static struct notifier_block nx842_of_nb = { + .notifier_call = nx842_OF_notifier, +}; + +#define nx842_counter_read(_name) \ +static ssize_t nx842_##_name##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) { \ + struct nx842_devdata *local_devdata; \ + int p = 0; \ + rcu_read_lock(); \ + local_devdata = rcu_dereference(devdata); \ + if (local_devdata) \ + p = snprintf(buf, PAGE_SIZE, "%ld\n", \ + atomic64_read(&local_devdata->counters->_name)); \ + rcu_read_unlock(); \ + return p; \ +} + +#define NX842DEV_COUNTER_ATTR_RO(_name) \ + nx842_counter_read(_name); \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, \ + 0444, \ + nx842_##_name##_show,\ + NULL); + +NX842DEV_COUNTER_ATTR_RO(comp_complete); +NX842DEV_COUNTER_ATTR_RO(comp_failed); +NX842DEV_COUNTER_ATTR_RO(decomp_complete); +NX842DEV_COUNTER_ATTR_RO(decomp_failed); +NX842DEV_COUNTER_ATTR_RO(swdecomp); + +static ssize_t nx842_timehist_show(struct device *, + struct device_attribute *, char *); + +static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, + nx842_timehist_show, NULL); +static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, + 0444, nx842_timehist_show, NULL); + +static ssize_t nx842_timehist_show(struct device *dev, + struct device_attribute *attr, char *buf) { + char *p = buf; + struct nx842_devdata *local_devdata; + atomic64_t *times; + int bytes_remain = PAGE_SIZE; + int bytes; + int i; + + rcu_read_lock(); + local_devdata = rcu_dereference(devdata); + if (!local_devdata) { + rcu_read_unlock(); + return 0; + } + + if (attr == &dev_attr_comp_times) + times = local_devdata->counters->comp_times; + else if (attr == &dev_attr_decomp_times) + times = local_devdata->counters->decomp_times; + else { + rcu_read_unlock(); + return 0; + } + + for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { + bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", + i ? (2<<(i-1)) : 0, (2<<i)-1, + atomic64_read(×[i])); + bytes_remain -= bytes; + p += bytes; + } + /* The last bucket holds everything over + * 2<<(NX842_HIST_SLOTS - 2) us */ + bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", + 2<<(NX842_HIST_SLOTS - 2), + atomic64_read(×[(NX842_HIST_SLOTS - 1)])); + p += bytes; + + rcu_read_unlock(); + return p - buf; +} + +static struct attribute *nx842_sysfs_entries[] = { + &dev_attr_comp_complete.attr, + &dev_attr_comp_failed.attr, + &dev_attr_decomp_complete.attr, + &dev_attr_decomp_failed.attr, + &dev_attr_swdecomp.attr, + &dev_attr_comp_times.attr, + &dev_attr_decomp_times.attr, + NULL, +}; + +static struct attribute_group nx842_attribute_group = { + .name = NULL, /* put in device directory */ + .attrs = nx842_sysfs_entries, +}; + +static struct nx842_driver nx842_pseries_driver = { + .owner = THIS_MODULE, + .constraints = &nx842_pseries_constraints, + .compress = nx842_pseries_compress, + .decompress = nx842_pseries_decompress, +}; + +static int __init nx842_probe(struct vio_dev *viodev, + const struct vio_device_id *id) +{ + struct nx842_devdata *old_devdata, *new_devdata = NULL; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + + if (old_devdata && old_devdata->vdev != NULL) { + dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); + ret = -1; + goto error_unlock; + } + + dev_set_drvdata(&viodev->dev, NULL); + + new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); + if (!new_devdata) { + dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), + GFP_NOFS); + if (!new_devdata->counters) { + dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); + ret = -ENOMEM; + goto error_unlock; + } + + new_devdata->vdev = viodev; + new_devdata->dev = &viodev->dev; + nx842_OF_set_defaults(new_devdata); + + rcu_assign_pointer(devdata, new_devdata); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + kfree(old_devdata); + + of_reconfig_notifier_register(&nx842_of_nb); + + ret = nx842_OF_upd(NULL); + if (ret && ret != -ENODEV) { + dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); + ret = -1; + goto error; + } + + rcu_read_lock(); + dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); + rcu_read_unlock(); + + if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { + dev_err(&viodev->dev, "could not create sysfs device attributes\n"); + ret = -1; + goto error; + } + + nx842_register_driver(&nx842_pseries_driver); + + return 0; + +error_unlock: + spin_unlock_irqrestore(&devdata_mutex, flags); + if (new_devdata) + kfree(new_devdata->counters); + kfree(new_devdata); +error: + return ret; +} + +static int __exit nx842_remove(struct vio_dev *viodev) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Removing IBM Power 842 compression device\n"); + sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); + + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + of_reconfig_notifier_unregister(&nx842_of_nb); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + dev_set_drvdata(&viodev->dev, NULL); + if (old_devdata) + kfree(old_devdata->counters); + kfree(old_devdata); + + nx842_unregister_driver(&nx842_pseries_driver); + + return 0; +} + +static struct vio_device_id nx842_vio_driver_ids[] = { + {NX842_PSERIES_COMPAT_NAME "-v1", NX842_PSERIES_COMPAT_NAME}, + {"", ""}, +}; + +static struct vio_driver nx842_vio_driver = { + .name = MODULE_NAME, + .probe = nx842_probe, + .remove = __exit_p(nx842_remove), + .get_desired_dma = nx842_get_desired_dma, + .id_table = nx842_vio_driver_ids, +}; + +static int __init nx842_init(void) +{ + struct nx842_devdata *new_devdata; + pr_info("Registering IBM Power 842 compression driver\n"); + + BUILD_BUG_ON(sizeof(struct nx842_workmem) > NX842_MEM_COMPRESS); + + RCU_INIT_POINTER(devdata, NULL); + new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); + if (!new_devdata) { + pr_err("Could not allocate memory for device data\n"); + return -ENOMEM; + } + new_devdata->status = UNAVAILABLE; + RCU_INIT_POINTER(devdata, new_devdata); + + return vio_register_driver(&nx842_vio_driver); +} + +module_init(nx842_init); + +static void __exit nx842_exit(void) +{ + struct nx842_devdata *old_devdata; + unsigned long flags; + + pr_info("Exiting IBM Power 842 compression driver\n"); + spin_lock_irqsave(&devdata_mutex, flags); + old_devdata = rcu_dereference_check(devdata, + lockdep_is_held(&devdata_mutex)); + RCU_INIT_POINTER(devdata, NULL); + spin_unlock_irqrestore(&devdata_mutex, flags); + synchronize_rcu(); + if (old_devdata && old_devdata->dev) + dev_set_drvdata(old_devdata->dev, NULL); + kfree(old_devdata); + nx842_unregister_driver(&nx842_pseries_driver); + vio_unregister_driver(&nx842_vio_driver); +} + +module_exit(nx842_exit); + diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 887196e..bf2823c 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -1,5 +1,10 @@ /* - * Driver for IBM Power 842 compression accelerator + * Driver frontend for IBM Power 842 compression accelerator + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * Designer of the Power data compression engine: + * Bulent Abali <abali@us.ibm.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -10,1594 +15,170 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) IBM Corporation, 2012 - * - * Authors: Robert Jennings <rcj@linux.vnet.ibm.com> - * Seth Jennings <sjenning@linux.vnet.ibm.com> */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/nx842.h> -#include <linux/of.h> -#include <linux/slab.h> - -#include <asm/page.h> -#include <asm/vio.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include "nx_csbcpb.h" /* struct nx_csbcpb */ +#include "nx-842.h" #define MODULE_NAME "nx-compress" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors"); -#define SHIFT_4K 12 -#define SHIFT_64K 16 -#define SIZE_4K (1UL << SHIFT_4K) -#define SIZE_64K (1UL << SHIFT_64K) - -/* IO buffer must be 128 byte aligned */ -#define IO_BUFFER_ALIGN 128 - -struct nx842_header { - int blocks_nr; /* number of compressed blocks */ - int offset; /* offset of the first block (from beginning of header) */ - int sizes[0]; /* size of compressed blocks */ -}; - -static inline int nx842_header_size(const struct nx842_header *hdr) -{ - return sizeof(struct nx842_header) + - hdr->blocks_nr * sizeof(hdr->sizes[0]); -} - -/* Macros for fields within nx_csbcpb */ -/* Check the valid bit within the csbcpb valid field */ -#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7)) - -/* CE macros operate on the completion_extension field bits in the csbcpb. - * CE0 0=full completion, 1=partial completion - * CE1 0=CE0 indicates completion, 1=termination (output may be modified) - * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */ -#define NX842_CSBCPB_CE0(x) (x & BIT_MASK(7)) -#define NX842_CSBCPB_CE1(x) (x & BIT_MASK(6)) -#define NX842_CSBCPB_CE2(x) (x & BIT_MASK(5)) - -/* The NX unit accepts data only on 4K page boundaries */ -#define NX842_HW_PAGE_SHIFT SHIFT_4K -#define NX842_HW_PAGE_SIZE (ASM_CONST(1) << NX842_HW_PAGE_SHIFT) -#define NX842_HW_PAGE_MASK (~(NX842_HW_PAGE_SIZE-1)) - -enum nx842_status { - UNAVAILABLE, - AVAILABLE -}; - -struct ibm_nx842_counters { - atomic64_t comp_complete; - atomic64_t comp_failed; - atomic64_t decomp_complete; - atomic64_t decomp_failed; - atomic64_t swdecomp; - atomic64_t comp_times[32]; - atomic64_t decomp_times[32]; -}; - -static struct nx842_devdata { - struct vio_dev *vdev; - struct device *dev; - struct ibm_nx842_counters *counters; - unsigned int max_sg_len; - unsigned int max_sync_size; - unsigned int max_sync_sg; - enum nx842_status status; -} __rcu *devdata; -static DEFINE_SPINLOCK(devdata_mutex); - -#define NX842_COUNTER_INC(_x) \ -static inline void nx842_inc_##_x( \ - const struct nx842_devdata *dev) { \ - if (dev) \ - atomic64_inc(&dev->counters->_x); \ -} -NX842_COUNTER_INC(comp_complete); -NX842_COUNTER_INC(comp_failed); -NX842_COUNTER_INC(decomp_complete); -NX842_COUNTER_INC(decomp_failed); -NX842_COUNTER_INC(swdecomp); - -#define NX842_HIST_SLOTS 16 - -static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time) -{ - int bucket = fls(time); - - if (bucket) - bucket = min((NX842_HIST_SLOTS - 1), bucket - 1); - - atomic64_inc(×[bucket]); -} - -/* NX unit operation flags */ -#define NX842_OP_COMPRESS 0x0 -#define NX842_OP_CRC 0x1 -#define NX842_OP_DECOMPRESS 0x2 -#define NX842_OP_COMPRESS_CRC (NX842_OP_COMPRESS | NX842_OP_CRC) -#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC) -#define NX842_OP_ASYNC (1<<23) -#define NX842_OP_NOTIFY (1<<22) -#define NX842_OP_NOTIFY_INT(x) ((x & 0xff)<<8) +/* Only one driver is expected, based on the HW platform */ +static struct nx842_driver *nx842_driver; +static DEFINE_SPINLOCK(nx842_driver_lock); /* protects driver pointers */ -static unsigned long nx842_get_desired_dma(struct vio_dev *viodev) +void nx842_register_driver(struct nx842_driver *driver) { - /* No use of DMA mappings within the driver. */ - return 0; -} - -struct nx842_slentry { - unsigned long ptr; /* Real address (use __pa()) */ - unsigned long len; -}; - -/* pHyp scatterlist entry */ -struct nx842_scatterlist { - int entry_nr; /* number of slentries */ - struct nx842_slentry *entries; /* ptr to array of slentries */ -}; + spin_lock(&nx842_driver_lock); -/* Does not include sizeof(entry_nr) in the size */ -static inline unsigned long nx842_get_scatterlist_size( - struct nx842_scatterlist *sl) -{ - return sl->entry_nr * sizeof(struct nx842_slentry); -} - -static inline unsigned long nx842_get_pa(void *addr) -{ - if (is_vmalloc_addr(addr)) - return page_to_phys(vmalloc_to_page(addr)) - + offset_in_page(addr); - else - return __pa(addr); -} - -static int nx842_build_scatterlist(unsigned long buf, int len, - struct nx842_scatterlist *sl) -{ - unsigned long nextpage; - struct nx842_slentry *entry; - - sl->entry_nr = 0; - - entry = sl->entries; - while (len) { - entry->ptr = nx842_get_pa((void *)buf); - nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE); - if (nextpage < buf + len) { - /* we aren't at the end yet */ - if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE)) - /* we are in the middle (or beginning) */ - entry->len = NX842_HW_PAGE_SIZE; - else - /* we are at the beginning */ - entry->len = nextpage - buf; - } else { - /* at the end */ - entry->len = len; - } - - len -= entry->len; - buf += entry->len; - sl->entry_nr++; - entry++; + if (nx842_driver) { + pr_err("can't register driver %s, already using driver %s\n", + driver->owner->name, nx842_driver->owner->name); + } else { + pr_info("registering driver %s\n", driver->owner->name); + nx842_driver = driver; } - return 0; + spin_unlock(&nx842_driver_lock); } +EXPORT_SYMBOL_GPL(nx842_register_driver); -/* - * Working memory for software decompression - */ -struct sw842_fifo { - union { - char f8[256][8]; - char f4[512][4]; - }; - char f2[256][2]; - unsigned char f84_full; - unsigned char f2_full; - unsigned char f8_count; - unsigned char f2_count; - unsigned int f4_count; -}; - -/* - * Working memory for crypto API - */ -struct nx842_workmem { - char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */ - union { - /* hardware working memory */ - struct { - /* scatterlist */ - char slin[SIZE_4K]; - char slout[SIZE_4K]; - /* coprocessor status/parameter block */ - struct nx_csbcpb csbcpb; - }; - /* software working memory */ - struct sw842_fifo swfifo; /* software decompression fifo */ - }; -}; - -int nx842_get_workmem_size(void) +void nx842_unregister_driver(struct nx842_driver *driver) { - return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE; -} -EXPORT_SYMBOL_GPL(nx842_get_workmem_size); - -int nx842_get_workmem_size_aligned(void) -{ - return sizeof(struct nx842_workmem); -} -EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned); + spin_lock(&nx842_driver_lock); -static int nx842_validate_result(struct device *dev, - struct cop_status_block *csb) -{ - /* The csb must be valid after returning from vio_h_cop_sync */ - if (!NX842_CSBCBP_VALID_CHK(csb->valid)) { - dev_err(dev, "%s: cspcbp not valid upon completion.\n", - __func__); - dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n", - csb->valid, - csb->crb_seq_number, - csb->completion_code, - csb->completion_extension); - dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n", - csb->processed_byte_count, - (unsigned long)csb->address); - return -EIO; - } - - /* Check return values from the hardware in the CSB */ - switch (csb->completion_code) { - case 0: /* Completed without error */ - break; - case 64: /* Target bytes > Source bytes during compression */ - case 13: /* Output buffer too small */ - dev_dbg(dev, "%s: Compression output larger than input\n", - __func__); - return -ENOSPC; - case 66: /* Input data contains an illegal template field */ - case 67: /* Template indicates data past the end of the input stream */ - dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n", - __func__, csb->completion_code); - return -EINVAL; - default: - dev_dbg(dev, "%s: Unspecified error (code:%d)\n", - __func__, csb->completion_code); - return -EIO; - } - - /* Hardware sanity check */ - if (!NX842_CSBCPB_CE2(csb->completion_extension)) { - dev_err(dev, "%s: No error returned by hardware, but " - "data returned is unusable, contact support.\n" - "(Additional info: csbcbp->processed bytes " - "does not specify processed bytes for the " - "target buffer.)\n", __func__); - return -EIO; + if (nx842_driver == driver) { + pr_info("unregistering driver %s\n", driver->owner->name); + nx842_driver = NULL; + } else if (nx842_driver) { + pr_err("can't unregister driver %s, using driver %s\n", + driver->owner->name, nx842_driver->owner->name); + } else { + pr_err("can't unregister driver %s, no driver in use\n", + driver->owner->name); } - return 0; + spin_unlock(&nx842_driver_lock); } +EXPORT_SYMBOL_GPL(nx842_unregister_driver); -/** - * nx842_compress - Compress data using the 842 algorithm - * - * Compression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is compressed and the result is stored in the - * provided output buffer. - * - * Upon return from this function @outlen contains the length of the - * compressed data. If there is an error then @outlen will be 0 and an - * error will be specified by the return code from this function. - * - * @in: Pointer to input buffer, must be page aligned - * @inlen: Length of input buffer, must be PAGE_SIZE - * @out: Pointer to output buffer - * @outlen: Length of output buffer - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_get_workmem_size() - * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EMSGSIZE XXX Difficult to describe this limitation - * -EIO Internal error - * -ENODEV Hardware unavailable - */ -int nx842_compress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, void *wmem) +static struct nx842_driver *get_driver(void) { - struct nx842_header *hdr; - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0, max_sync_size, i, bytesleft, size, hdrsize; - unsigned long inbuf, outbuf, padding; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start_time = get_tb(); - - /* - * Make sure input buffer is 64k page aligned. This is assumed since - * this driver is designed for page compression only (for now). This - * is very nice since we can now use direct DDE(s) for the input and - * the alignment is guaranteed. - */ - inbuf = (unsigned long)in; - if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata || !local_devdata->dev) { - rcu_read_unlock(); - return -ENODEV; - } - max_sync_size = local_devdata->max_sync_size; - dev = local_devdata->dev; - - /* Create the header */ - hdr = (struct nx842_header *)out; - hdr->blocks_nr = PAGE_SIZE / max_sync_size; - hdrsize = nx842_header_size(hdr); - outbuf = (unsigned long)out + hdrsize; - bytesleft = *outlen - hdrsize; - - /* Init scatterlist */ - workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, - NX842_HW_PAGE_SIZE); - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_COMPRESS; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - op.out = nx842_get_pa(slout.entries); + struct nx842_driver *driver = NULL; - for (i = 0; i < hdr->blocks_nr; i++) { - /* - * Aligning the output blocks to 128 bytes does waste space, - * but it prevents the need for bounce buffers and memory - * copies. It also simplifies the code a lot. In the worst - * case (64k page, 4k max_sync_size), you lose up to - * (128*16)/64k = ~3% the compression factor. For 64k - * max_sync_size, the loss would be at most 128/64k = ~0.2%. - */ - padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf; - outbuf += padding; - bytesleft -= padding; - if (i == 0) - /* save offset into first block in header */ - hdr->offset = padding + hdrsize; + spin_lock(&nx842_driver_lock); - if (bytesleft <= 0) { - ret = -ENOSPC; - goto unlock; - } + driver = nx842_driver; - /* - * NOTE: If the default max_sync_size is changed from 4k - * to 64k, remove the "likely" case below, since a - * scatterlist will always be needed. - */ - if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = max_sync_size; + if (driver && !try_module_get(driver->owner)) + driver = NULL; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, max_sync_size, &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } + spin_unlock(&nx842_driver_lock); - /* - * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect - * DDE is required for the outbuf. - * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must - * also be page aligned (1 in 128/4k=32 chance) in order - * to use a direct DDE. - * This is unlikely, just use an indirect DDE always. - */ - nx842_build_scatterlist(outbuf, - min(bytesleft, max_sync_size), &slout); - /* op.out set before loop */ - op.outlen = -nx842_get_scatterlist_size(&slout); - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - ret = -EIO; - goto unlock; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret && ret != -ENOSPC) - goto unlock; - - /* Handle incompressible data */ - if (unlikely(ret == -ENOSPC)) { - if (bytesleft < max_sync_size) { - /* - * Not enough space left in the output buffer - * to store uncompressed block - */ - goto unlock; - } else { - /* Store incompressible block */ - memcpy((void *)outbuf, (void *)inbuf, - max_sync_size); - hdr->sizes[i] = -max_sync_size; - outbuf += max_sync_size; - bytesleft -= max_sync_size; - /* Reset ret, incompressible data handled */ - ret = 0; - } - } else { - /* Normal case, compression was successful */ - size = csbcpb->csb.processed_byte_count; - dev_dbg(dev, "%s: processed_bytes=%d\n", - __func__, size); - hdr->sizes[i] = size; - outbuf += size; - bytesleft -= size; - } - - inbuf += max_sync_size; - } - - *outlen = (unsigned int)(outbuf - (unsigned long)out); - -unlock: - if (ret) - nx842_inc_comp_failed(local_devdata); - else { - nx842_inc_comp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->comp_times, - (get_tb() - start_time) / tb_ticks_per_usec); - } - rcu_read_unlock(); - return ret; + return driver; } -EXPORT_SYMBOL_GPL(nx842_compress); - -static int sw842_decompress(const unsigned char *, int, unsigned char *, int *, - const void *); -/** - * nx842_decompress - Decompress data using the 842 algorithm - * - * Decompression provide by the NX842 coprocessor on IBM Power systems. - * The input buffer is decompressed and the result is stored in the - * provided output buffer. The size allocated to the output buffer is - * provided by the caller of this function in @outlen. Upon return from - * this function @outlen contains the length of the decompressed data. - * If there is an error then @outlen will be 0 and an error will be - * specified by the return code from this function. - * - * @in: Pointer to input buffer, will use bounce buffer if not 128 byte - * aligned - * @inlen: Length of input buffer - * @out: Pointer to output buffer, must be page aligned - * @outlen: Length of output buffer, must be PAGE_SIZE - * @wrkmem: ptr to buffer for working memory, size determined by - * nx842_get_workmem_size() - * - * Returns: - * 0 Success, output of length @outlen stored in the buffer at @out - * -ENODEV Hardware decompression device is unavailable - * -ENOMEM Unable to allocate internal buffers - * -ENOSPC Output buffer is to small - * -EINVAL Bad input data encountered when attempting decompress - * -EIO Internal error - */ -int nx842_decompress(const unsigned char *in, unsigned int inlen, - unsigned char *out, unsigned int *outlen, void *wmem) +static void put_driver(struct nx842_driver *driver) { - struct nx842_header *hdr; - struct nx842_devdata *local_devdata; - struct device *dev = NULL; - struct nx842_workmem *workmem; - struct nx842_scatterlist slin, slout; - struct nx_csbcpb *csbcpb; - int ret = 0, i, size, max_sync_size; - unsigned long inbuf, outbuf; - struct vio_pfo_op op = { - .done = NULL, - .handle = 0, - .timeout = 0, - }; - unsigned long start_time = get_tb(); - - /* Ensure page alignment and size */ - outbuf = (unsigned long)out; - if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE) - return -EINVAL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (local_devdata) - dev = local_devdata->dev; - - /* Get header */ - hdr = (struct nx842_header *)in; - - workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem, - NX842_HW_PAGE_SIZE); - - inbuf = (unsigned long)in + hdr->offset; - if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) { - /* Copy block(s) into bounce buffer for alignment */ - memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset); - inbuf = (unsigned long)workmem->bounce; - } - - /* Init scatterlist */ - slin.entries = (struct nx842_slentry *)workmem->slin; - slout.entries = (struct nx842_slentry *)workmem->slout; - - /* Init operation */ - op.flags = NX842_OP_DECOMPRESS; - csbcpb = &workmem->csbcpb; - memset(csbcpb, 0, sizeof(*csbcpb)); - op.csbcpb = nx842_get_pa(csbcpb); - - /* - * max_sync_size may have changed since compression, - * so we can't read it from the device info. We need - * to derive it from hdr->blocks_nr. - */ - max_sync_size = PAGE_SIZE / hdr->blocks_nr; - - for (i = 0; i < hdr->blocks_nr; i++) { - /* Skip padding */ - inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN); - - if (hdr->sizes[i] < 0) { - /* Negative sizes indicate uncompressed data blocks */ - size = abs(hdr->sizes[i]); - memcpy((void *)outbuf, (void *)inbuf, size); - outbuf += size; - inbuf += size; - continue; - } - - if (!dev) - goto sw; - - /* - * The better the compression, the more likely the "likely" - * case becomes. - */ - if (likely((inbuf & NX842_HW_PAGE_MASK) == - ((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) { - /* Create direct DDE */ - op.in = nx842_get_pa((void *)inbuf); - op.inlen = hdr->sizes[i]; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin); - op.in = nx842_get_pa(slin.entries); - op.inlen = -nx842_get_scatterlist_size(&slin); - } - - /* - * NOTE: If the default max_sync_size is changed from 4k - * to 64k, remove the "likely" case below, since a - * scatterlist will always be needed. - */ - if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) { - /* Create direct DDE */ - op.out = nx842_get_pa((void *)outbuf); - op.outlen = max_sync_size; - } else { - /* Create indirect DDE (scatterlist) */ - nx842_build_scatterlist(outbuf, max_sync_size, &slout); - op.out = nx842_get_pa(slout.entries); - op.outlen = -nx842_get_scatterlist_size(&slout); - } - - /* Send request to pHyp */ - ret = vio_h_cop_sync(local_devdata->vdev, &op); - - /* Check for pHyp error */ - if (ret) { - dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n", - __func__, ret, op.hcall_err); - dev = NULL; - goto sw; - } - - /* Check for hardware error */ - ret = nx842_validate_result(dev, &csbcpb->csb); - if (ret) { - dev = NULL; - goto sw; - } - - /* HW decompression success */ - inbuf += hdr->sizes[i]; - outbuf += csbcpb->csb.processed_byte_count; - continue; - -sw: - /* software decompression */ - size = max_sync_size; - ret = sw842_decompress( - (unsigned char *)inbuf, hdr->sizes[i], - (unsigned char *)outbuf, &size, wmem); - if (ret) - pr_debug("%s: sw842_decompress failed with %d\n", - __func__, ret); - - if (ret) { - if (ret != -ENOSPC && ret != -EINVAL && - ret != -EMSGSIZE) - ret = -EIO; - goto unlock; - } - - /* SW decompression success */ - inbuf += hdr->sizes[i]; - outbuf += size; - } - - *outlen = (unsigned int)(outbuf - (unsigned long)out); - -unlock: - if (ret) - /* decompress fail */ - nx842_inc_decomp_failed(local_devdata); - else { - if (!dev) - /* software decompress */ - nx842_inc_swdecomp(local_devdata); - nx842_inc_decomp_complete(local_devdata); - ibm_nx842_incr_hist(local_devdata->counters->decomp_times, - (get_tb() - start_time) / tb_ticks_per_usec); - } - - rcu_read_unlock(); - return ret; + module_put(driver->owner); } -EXPORT_SYMBOL_GPL(nx842_decompress); /** - * nx842_OF_set_defaults -- Set default (disabled) values for devdata - * - * @devdata - struct nx842_devdata to update - * - * Returns: - * 0 on success - * -ENOENT if @devdata ptr is NULL + * nx842_constraints + * + * This provides the driver's constraints. Different nx842 implementations + * may have varying requirements. The constraints are: + * @alignment: All buffers should be aligned to this + * @multiple: All buffer lengths should be a multiple of this + * @minimum: Buffer lengths must not be less than this amount + * @maximum: Buffer lengths must not be more than this amount + * + * The constraints apply to all buffers and lengths, both input and output, + * for both compression and decompression, except for the minimum which + * only applies to compression input and decompression output; the + * compressed data can be less than the minimum constraint. It can be + * assumed that compressed data will always adhere to the multiple + * constraint. + * + * The driver may succeed even if these constraints are violated; + * however the driver can return failure or suffer reduced performance + * if any constraint is not met. */ -static int nx842_OF_set_defaults(struct nx842_devdata *devdata) +int nx842_constraints(struct nx842_constraints *c) { - if (devdata) { - devdata->max_sync_size = 0; - devdata->max_sync_sg = 0; - devdata->max_sg_len = 0; - devdata->status = UNAVAILABLE; - return 0; - } else - return -ENOENT; -} - -/** - * nx842_OF_upd_status -- Update the device info from OF status prop - * - * The status property indicates if the accelerator is enabled. If the - * device is in the OF tree it indicates that the hardware is present. - * The status field indicates if the device is enabled when the status - * is 'okay'. Otherwise the device driver will be disabled. - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 - Device is available - * -EINVAL - Device is not available - */ -static int nx842_OF_upd_status(struct nx842_devdata *devdata, - struct property *prop) { + struct nx842_driver *driver = get_driver(); int ret = 0; - const char *status = (const char *)prop->value; - - if (!strncmp(status, "okay", (size_t)prop->length)) { - devdata->status = AVAILABLE; - } else { - dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n", - __func__, status); - devdata->status = UNAVAILABLE; - } - - return ret; -} - -/** - * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop - * - * Definition of the 'ibm,max-sg-len' OF property: - * This field indicates the maximum byte length of a scatter list - * for the platform facility. It is a single cell encoded as with encode-int. - * - * Example: - * # od -x ibm,max-sg-len - * 0000000 0000 0ff0 - * - * In this example, the maximum byte length of a scatter list is - * 0x0ff0 (4,080). - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const int *maxsglen = prop->value; - - if (prop->length != sizeof(*maxsglen)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__, - prop->length, sizeof(*maxsglen)); - ret = -EINVAL; - } else { - devdata->max_sg_len = (unsigned int)min(*maxsglen, - (int)NX842_HW_PAGE_SIZE); - } - return ret; -} - -/** - * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop - * - * Definition of the 'ibm,max-sync-cop' OF property: - * Two series of cells. The first series of cells represents the maximums - * that can be synchronously compressed. The second series of cells - * represents the maximums that can be synchronously decompressed. - * 1. The first cell in each series contains the count of the number of - * data length, scatter list elements pairs that follow – each being - * of the form - * a. One cell data byte length - * b. One cell total number of scatter list elements - * - * Example: - * # od -x ibm,max-sync-cop - * 0000000 0000 0001 0000 1000 0000 01fe 0000 0001 - * 0000020 0000 1000 0000 01fe - * - * In this example, compression supports 0x1000 (4,096) data byte length - * and 0x1fe (510) total scatter list elements. Decompression supports - * 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list - * elements. - * - * @devdata - struct nx842_devdata to update - * @prop - struct property point containing the maxsyncop for the update - * - * Returns: - * 0 on success - * -EINVAL on failure - */ -static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata, - struct property *prop) { - int ret = 0; - const struct maxsynccop_t { - int comp_elements; - int comp_data_limit; - int comp_sg_limit; - int decomp_elements; - int decomp_data_limit; - int decomp_sg_limit; - } *maxsynccop; - - if (prop->length != sizeof(*maxsynccop)) { - dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__); - dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length, - sizeof(*maxsynccop)); - ret = -EINVAL; - goto out; - } - - maxsynccop = (const struct maxsynccop_t *)prop->value; - - /* Use one limit rather than separate limits for compression and - * decompression. Set a maximum for this so as not to exceed the - * size that the header can support and round the value down to - * the hardware page size (4K) */ - devdata->max_sync_size = - (unsigned int)min(maxsynccop->comp_data_limit, - maxsynccop->decomp_data_limit); + if (!driver) + return -ENODEV; - devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size, - SIZE_64K); + BUG_ON(!c); + memcpy(c, driver->constraints, sizeof(*c)); - if (devdata->max_sync_size < SIZE_4K) { - dev_err(devdata->dev, "%s: hardware max data size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_size); - ret = -EINVAL; - goto out; - } + put_driver(driver); - devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit, - maxsynccop->decomp_sg_limit); - if (devdata->max_sync_sg < 1) { - dev_err(devdata->dev, "%s: hardware max sg size (%u) is " - "less than the driver minimum, unable to use " - "the hardware device\n", - __func__, devdata->max_sync_sg); - ret = -EINVAL; - goto out; - } - -out: return ret; } +EXPORT_SYMBOL_GPL(nx842_constraints); -/** - * - * nx842_OF_upd -- Handle OF properties updates for the device. - * - * Set all properties from the OF tree. Optionally, a new property - * can be provided by the @new_prop pointer to overwrite an existing value. - * The device will remain disabled until all values are valid, this function - * will return an error for updates unless all values are valid. - * - * @new_prop: If not NULL, this property is being updated. If NULL, update - * all properties from the current values in the OF tree. - * - * Returns: - * 0 - Success - * -ENOMEM - Could not allocate memory for new devdata structure - * -EINVAL - property value not found, new_prop is not a recognized - * property for the device or property value is not valid. - * -ENODEV - Device is not available - */ -static int nx842_OF_upd(struct property *new_prop) +int nx842_compress(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem) { - struct nx842_devdata *old_devdata = NULL; - struct nx842_devdata *new_devdata = NULL; - struct device_node *of_node = NULL; - struct property *status = NULL; - struct property *maxsglen = NULL; - struct property *maxsyncop = NULL; - int ret = 0; - unsigned long flags; - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - if (old_devdata) - of_node = old_devdata->dev->of_node; + struct nx842_driver *driver = get_driver(); + int ret; - if (!old_devdata || !of_node) { - pr_err("%s: device is not available\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); + if (!driver) return -ENODEV; - } - - new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); - if (!new_devdata) { - dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__); - ret = -ENOMEM; - goto error_out; - } - memcpy(new_devdata, old_devdata, sizeof(*old_devdata)); - new_devdata->counters = old_devdata->counters; + ret = driver->compress(in, in_len, out, out_len, wrkmem); - /* Set ptrs for existing properties */ - status = of_find_property(of_node, "status", NULL); - maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL); - maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL); - if (!status || !maxsglen || !maxsyncop) { - dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__); - ret = -EINVAL; - goto error_out; - } - - /* - * If this is a property update, there are only certain properties that - * we care about. Bail if it isn't in the below list - */ - if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) || - strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length))) - goto out; - - /* Perform property updates */ - ret = nx842_OF_upd_status(new_devdata, status); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen); - if (ret) - goto error_out; - - ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop); - if (ret) - goto error_out; - -out: - dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n", - __func__, new_devdata->max_sync_size, - old_devdata->max_sync_size); - dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n", - __func__, new_devdata->max_sync_sg, - old_devdata->max_sync_sg); - dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n", - __func__, new_devdata->max_sg_len, - old_devdata->max_sg_len); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - return 0; - -error_out: - if (new_devdata) { - dev_info(old_devdata->dev, "%s: device disabled\n", __func__); - nx842_OF_set_defaults(new_devdata); - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(new_devdata->dev, new_devdata); - kfree(old_devdata); - } else { - dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__); - spin_unlock_irqrestore(&devdata_mutex, flags); - } + put_driver(driver); - if (!ret) - ret = -EINVAL; return ret; } +EXPORT_SYMBOL_GPL(nx842_compress); -/** - * nx842_OF_notifier - Process updates to OF properties for the device - * - * @np: notifier block - * @action: notifier action - * @update: struct pSeries_reconfig_prop_update pointer if action is - * PSERIES_UPDATE_PROPERTY - * - * Returns: - * NOTIFY_OK on success - * NOTIFY_BAD encoded with error number on failure, use - * notifier_to_errno() to decode this value - */ -static int nx842_OF_notifier(struct notifier_block *np, unsigned long action, - void *data) -{ - struct of_reconfig_data *upd = data; - struct nx842_devdata *local_devdata; - struct device_node *node = NULL; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (local_devdata) - node = local_devdata->dev->of_node; - - if (local_devdata && - action == OF_RECONFIG_UPDATE_PROPERTY && - !strcmp(upd->dn->name, node->name)) { - rcu_read_unlock(); - nx842_OF_upd(upd->prop); - } else - rcu_read_unlock(); - - return NOTIFY_OK; -} - -static struct notifier_block nx842_of_nb = { - .notifier_call = nx842_OF_notifier, -}; - -#define nx842_counter_read(_name) \ -static ssize_t nx842_##_name##_show(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) { \ - struct nx842_devdata *local_devdata; \ - int p = 0; \ - rcu_read_lock(); \ - local_devdata = rcu_dereference(devdata); \ - if (local_devdata) \ - p = snprintf(buf, PAGE_SIZE, "%ld\n", \ - atomic64_read(&local_devdata->counters->_name)); \ - rcu_read_unlock(); \ - return p; \ -} - -#define NX842DEV_COUNTER_ATTR_RO(_name) \ - nx842_counter_read(_name); \ - static struct device_attribute dev_attr_##_name = __ATTR(_name, \ - 0444, \ - nx842_##_name##_show,\ - NULL); - -NX842DEV_COUNTER_ATTR_RO(comp_complete); -NX842DEV_COUNTER_ATTR_RO(comp_failed); -NX842DEV_COUNTER_ATTR_RO(decomp_complete); -NX842DEV_COUNTER_ATTR_RO(decomp_failed); -NX842DEV_COUNTER_ATTR_RO(swdecomp); - -static ssize_t nx842_timehist_show(struct device *, - struct device_attribute *, char *); - -static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444, - nx842_timehist_show, NULL); -static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times, - 0444, nx842_timehist_show, NULL); - -static ssize_t nx842_timehist_show(struct device *dev, - struct device_attribute *attr, char *buf) { - char *p = buf; - struct nx842_devdata *local_devdata; - atomic64_t *times; - int bytes_remain = PAGE_SIZE; - int bytes; - int i; - - rcu_read_lock(); - local_devdata = rcu_dereference(devdata); - if (!local_devdata) { - rcu_read_unlock(); - return 0; - } - - if (attr == &dev_attr_comp_times) - times = local_devdata->counters->comp_times; - else if (attr == &dev_attr_decomp_times) - times = local_devdata->counters->decomp_times; - else { - rcu_read_unlock(); - return 0; - } - - for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) { - bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n", - i ? (2<<(i-1)) : 0, (2<<i)-1, - atomic64_read(×[i])); - bytes_remain -= bytes; - p += bytes; - } - /* The last bucket holds everything over - * 2<<(NX842_HIST_SLOTS - 2) us */ - bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n", - 2<<(NX842_HIST_SLOTS - 2), - atomic64_read(×[(NX842_HIST_SLOTS - 1)])); - p += bytes; - - rcu_read_unlock(); - return p - buf; -} - -static struct attribute *nx842_sysfs_entries[] = { - &dev_attr_comp_complete.attr, - &dev_attr_comp_failed.attr, - &dev_attr_decomp_complete.attr, - &dev_attr_decomp_failed.attr, - &dev_attr_swdecomp.attr, - &dev_attr_comp_times.attr, - &dev_attr_decomp_times.attr, - NULL, -}; - -static struct attribute_group nx842_attribute_group = { - .name = NULL, /* put in device directory */ - .attrs = nx842_sysfs_entries, -}; - -static int __init nx842_probe(struct vio_dev *viodev, - const struct vio_device_id *id) +int nx842_decompress(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem) { - struct nx842_devdata *old_devdata, *new_devdata = NULL; - unsigned long flags; - int ret = 0; - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - - if (old_devdata && old_devdata->vdev != NULL) { - dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__); - ret = -1; - goto error_unlock; - } - - dev_set_drvdata(&viodev->dev, NULL); - - new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS); - if (!new_devdata) { - dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__); - ret = -ENOMEM; - goto error_unlock; - } - - new_devdata->counters = kzalloc(sizeof(*new_devdata->counters), - GFP_NOFS); - if (!new_devdata->counters) { - dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__); - ret = -ENOMEM; - goto error_unlock; - } - - new_devdata->vdev = viodev; - new_devdata->dev = &viodev->dev; - nx842_OF_set_defaults(new_devdata); - - rcu_assign_pointer(devdata, new_devdata); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - kfree(old_devdata); - - of_reconfig_notifier_register(&nx842_of_nb); - - ret = nx842_OF_upd(NULL); - if (ret && ret != -ENODEV) { - dev_err(&viodev->dev, "could not parse device tree. %d\n", ret); - ret = -1; - goto error; - } + struct nx842_driver *driver = get_driver(); + int ret; - rcu_read_lock(); - dev_set_drvdata(&viodev->dev, rcu_dereference(devdata)); - rcu_read_unlock(); + if (!driver) + return -ENODEV; - if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) { - dev_err(&viodev->dev, "could not create sysfs device attributes\n"); - ret = -1; - goto error; - } + ret = driver->decompress(in, in_len, out, out_len, wrkmem); - return 0; + put_driver(driver); -error_unlock: - spin_unlock_irqrestore(&devdata_mutex, flags); - if (new_devdata) - kfree(new_devdata->counters); - kfree(new_devdata); -error: return ret; } +EXPORT_SYMBOL_GPL(nx842_decompress); -static int __exit nx842_remove(struct vio_dev *viodev) +static __init int nx842_init(void) { - struct nx842_devdata *old_devdata; - unsigned long flags; - - pr_info("Removing IBM Power 842 compression device\n"); - sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group); - - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - of_reconfig_notifier_unregister(&nx842_of_nb); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - dev_set_drvdata(&viodev->dev, NULL); - if (old_devdata) - kfree(old_devdata->counters); - kfree(old_devdata); - return 0; -} - -static struct vio_device_id nx842_driver_ids[] = { - {"ibm,compression-v1", "ibm,compression"}, - {"", ""}, -}; - -static struct vio_driver nx842_driver = { - .name = MODULE_NAME, - .probe = nx842_probe, - .remove = __exit_p(nx842_remove), - .get_desired_dma = nx842_get_desired_dma, - .id_table = nx842_driver_ids, -}; + pr_info("loading\n"); -static int __init nx842_init(void) -{ - struct nx842_devdata *new_devdata; - pr_info("Registering IBM Power 842 compression driver\n"); + if (of_find_compatible_node(NULL, NULL, NX842_POWERNV_COMPAT_NAME)) + request_module_nowait(NX842_POWERNV_MODULE_NAME); + else if (of_find_compatible_node(NULL, NULL, NX842_PSERIES_COMPAT_NAME)) + request_module_nowait(NX842_PSERIES_MODULE_NAME); + else + pr_err("no nx842 driver found.\n"); - RCU_INIT_POINTER(devdata, NULL); - new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL); - if (!new_devdata) { - pr_err("Could not allocate memory for device data\n"); - return -ENOMEM; - } - new_devdata->status = UNAVAILABLE; - RCU_INIT_POINTER(devdata, new_devdata); + pr_info("loaded\n"); - return vio_register_driver(&nx842_driver); + return 0; } - module_init(nx842_init); static void __exit nx842_exit(void) { - struct nx842_devdata *old_devdata; - unsigned long flags; - - pr_info("Exiting IBM Power 842 compression driver\n"); - spin_lock_irqsave(&devdata_mutex, flags); - old_devdata = rcu_dereference_check(devdata, - lockdep_is_held(&devdata_mutex)); - RCU_INIT_POINTER(devdata, NULL); - spin_unlock_irqrestore(&devdata_mutex, flags); - synchronize_rcu(); - if (old_devdata) - dev_set_drvdata(old_devdata->dev, NULL); - kfree(old_devdata); - vio_unregister_driver(&nx842_driver); + pr_info("NX842 unloaded\n"); } - module_exit(nx842_exit); - -/********************************* - * 842 software decompressor -*********************************/ -typedef int (*sw842_template_op)(const char **, int *, unsigned char **, - struct sw842_fifo *); - -static int sw842_data8(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_data4(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_data2(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr8(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr4(const char **, int *, unsigned char **, - struct sw842_fifo *); -static int sw842_ptr2(const char **, int *, unsigned char **, - struct sw842_fifo *); - -/* special templates */ -#define SW842_TMPL_REPEAT 0x1B -#define SW842_TMPL_ZEROS 0x1C -#define SW842_TMPL_EOF 0x1E - -static sw842_template_op sw842_tmpl_ops[26][4] = { - { sw842_data8, NULL}, /* 0 (00000) */ - { sw842_data4, sw842_data2, sw842_ptr2, NULL}, - { sw842_data4, sw842_ptr2, sw842_data2, NULL}, - { sw842_data4, sw842_ptr2, sw842_ptr2, NULL}, - { sw842_data4, sw842_ptr4, NULL}, - { sw842_data2, sw842_ptr2, sw842_data4, NULL}, - { sw842_data2, sw842_ptr2, sw842_data2, sw842_ptr2}, - { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_data2}, - { sw842_data2, sw842_ptr2, sw842_ptr2, sw842_ptr2,}, - { sw842_data2, sw842_ptr2, sw842_ptr4, NULL}, - { sw842_ptr2, sw842_data2, sw842_data4, NULL}, /* 10 (01010) */ - { sw842_ptr2, sw842_data4, sw842_ptr2, NULL}, - { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_data2}, - { sw842_ptr2, sw842_data2, sw842_ptr2, sw842_ptr2}, - { sw842_ptr2, sw842_data2, sw842_ptr4, NULL}, - { sw842_ptr2, sw842_ptr2, sw842_data4, NULL}, - { sw842_ptr2, sw842_ptr2, sw842_data2, sw842_ptr2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_data2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr2, sw842_ptr2}, - { sw842_ptr2, sw842_ptr2, sw842_ptr4, NULL}, - { sw842_ptr4, sw842_data4, NULL}, /* 20 (10100) */ - { sw842_ptr4, sw842_data2, sw842_ptr2, NULL}, - { sw842_ptr4, sw842_ptr2, sw842_data2, NULL}, - { sw842_ptr4, sw842_ptr2, sw842_ptr2, NULL}, - { sw842_ptr4, sw842_ptr4, NULL}, - { sw842_ptr8, NULL} -}; - -/* Software decompress helpers */ - -static uint8_t sw842_get_byte(const char *buf, int bit) -{ - uint8_t tmpl; - uint16_t tmp; - tmp = htons(*(uint16_t *)(buf)); - tmp = (uint16_t)(tmp << bit); - tmp = ntohs(tmp); - memcpy(&tmpl, &tmp, 1); - return tmpl; -} - -static uint8_t sw842_get_template(const char **buf, int *bit) -{ - uint8_t byte; - byte = sw842_get_byte(*buf, *bit); - byte = byte >> 3; - byte &= 0x1F; - *buf += (*bit + 5) / 8; - *bit = (*bit + 5) % 8; - return byte; -} - -/* repeat_count happens to be 5-bit too (like the template) */ -static uint8_t sw842_get_repeat_count(const char **buf, int *bit) -{ - uint8_t byte; - byte = sw842_get_byte(*buf, *bit); - byte = byte >> 2; - byte &= 0x3F; - *buf += (*bit + 6) / 8; - *bit = (*bit + 6) % 8; - return byte; -} - -static uint8_t sw842_get_ptr2(const char **buf, int *bit) -{ - uint8_t ptr; - ptr = sw842_get_byte(*buf, *bit); - (*buf)++; - return ptr; -} - -static uint16_t sw842_get_ptr4(const char **buf, int *bit, - struct sw842_fifo *fifo) -{ - uint16_t ptr; - ptr = htons(*(uint16_t *)(*buf)); - ptr = (uint16_t)(ptr << *bit); - ptr = ptr >> 7; - ptr &= 0x01FF; - *buf += (*bit + 9) / 8; - *bit = (*bit + 9) % 8; - return ptr; -} - -static uint8_t sw842_get_ptr8(const char **buf, int *bit, - struct sw842_fifo *fifo) -{ - return sw842_get_ptr2(buf, bit); -} - -/* Software decompress template ops */ - -static int sw842_data8(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - int ret; - - ret = sw842_data4(inbuf, inbit, outbuf, fifo); - if (ret) - return ret; - ret = sw842_data4(inbuf, inbit, outbuf, fifo); - return ret; -} - -static int sw842_data4(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - int ret; - - ret = sw842_data2(inbuf, inbit, outbuf, fifo); - if (ret) - return ret; - ret = sw842_data2(inbuf, inbit, outbuf, fifo); - return ret; -} - -static int sw842_data2(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - **outbuf = sw842_get_byte(*inbuf, *inbit); - (*inbuf)++; - (*outbuf)++; - **outbuf = sw842_get_byte(*inbuf, *inbit); - (*inbuf)++; - (*outbuf)++; - return 0; -} - -static int sw842_ptr8(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint8_t ptr; - ptr = sw842_get_ptr8(inbuf, inbit, fifo); - if (!fifo->f84_full && (ptr >= fifo->f8_count)) - return 1; - memcpy(*outbuf, fifo->f8[ptr], 8); - *outbuf += 8; - return 0; -} - -static int sw842_ptr4(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint16_t ptr; - ptr = sw842_get_ptr4(inbuf, inbit, fifo); - if (!fifo->f84_full && (ptr >= fifo->f4_count)) - return 1; - memcpy(*outbuf, fifo->f4[ptr], 4); - *outbuf += 4; - return 0; -} - -static int sw842_ptr2(const char **inbuf, int *inbit, - unsigned char **outbuf, struct sw842_fifo *fifo) -{ - uint8_t ptr; - ptr = sw842_get_ptr2(inbuf, inbit); - if (!fifo->f2_full && (ptr >= fifo->f2_count)) - return 1; - memcpy(*outbuf, fifo->f2[ptr], 2); - *outbuf += 2; - return 0; -} - -static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo) -{ - unsigned char initial_f2count = fifo->f2_count; - - memcpy(fifo->f8[fifo->f8_count], buf, 8); - fifo->f4_count += 2; - fifo->f8_count += 1; - - if (!fifo->f84_full && fifo->f4_count >= 512) { - fifo->f84_full = 1; - fifo->f4_count /= 512; - } - - memcpy(fifo->f2[fifo->f2_count++], buf, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2); - memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2); - if (fifo->f2_count < initial_f2count) - fifo->f2_full = 1; -} - -static int sw842_decompress(const unsigned char *src, int srclen, - unsigned char *dst, int *destlen, - const void *wrkmem) -{ - uint8_t tmpl; - const char *inbuf; - int inbit = 0; - unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf; - const char *inbuf_end; - sw842_template_op op; - int opindex; - int i, repeat_count; - struct sw842_fifo *fifo; - int ret = 0; - - fifo = &((struct nx842_workmem *)(wrkmem))->swfifo; - memset(fifo, 0, sizeof(*fifo)); - - origbuf = NULL; - inbuf = src; - inbuf_end = src + srclen; - outbuf = dst; - outbuf_end = dst + *destlen; - - while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) { - if (inbuf >= inbuf_end) { - ret = -EINVAL; - goto out; - } - - opindex = 0; - prevbuf = origbuf; - origbuf = outbuf; - switch (tmpl) { - case SW842_TMPL_REPEAT: - if (prevbuf == NULL) { - ret = -EINVAL; - goto out; - } - - repeat_count = sw842_get_repeat_count(&inbuf, - &inbit) + 1; - - /* Did the repeat count advance past the end of input */ - if (inbuf > inbuf_end) { - ret = -EINVAL; - goto out; - } - - for (i = 0; i < repeat_count; i++) { - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - memcpy(outbuf, prevbuf, 8); - sw842_copy_to_fifo(outbuf, fifo); - outbuf += 8; - } - break; - - case SW842_TMPL_ZEROS: - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - memset(outbuf, 0, 8); - sw842_copy_to_fifo(outbuf, fifo); - outbuf += 8; - break; - - default: - if (tmpl > 25) { - ret = -EINVAL; - goto out; - } - - /* Does this go past the end of the input buffer */ - if ((inbuf + 2) > inbuf_end) { - ret = -EINVAL; - goto out; - } - - /* Would this overflow the output buffer */ - if ((outbuf + 8) > outbuf_end) { - ret = -ENOSPC; - goto out; - } - - while (opindex < 4 && - (op = sw842_tmpl_ops[tmpl][opindex++]) - != NULL) { - ret = (*op)(&inbuf, &inbit, &outbuf, fifo); - if (ret) { - ret = -EINVAL; - goto out; - } - sw842_copy_to_fifo(origbuf, fifo); - } - } - } - -out: - if (!ret) - *destlen = (unsigned int)(outbuf - dst); - else - *destlen = 0; - - return ret; -} diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h new file mode 100644 index 0000000..84b15b7 --- /dev/null +++ b/drivers/crypto/nx/nx-842.h @@ -0,0 +1,131 @@ + +#ifndef __NX_842_H__ +#define __NX_842_H__ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/nx842.h> +#include <linux/sw842.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/ratelimit.h> + +/* Restrictions on Data Descriptor List (DDL) and Entry (DDE) buffers + * + * From NX P8 workbook, sec 4.9.1 "842 details" + * Each DDE buffer is 128 byte aligned + * Each DDE buffer size is a multiple of 32 bytes (except the last) + * The last DDE buffer size is a multiple of 8 bytes + */ +#define DDE_BUFFER_ALIGN (128) +#define DDE_BUFFER_SIZE_MULT (32) +#define DDE_BUFFER_LAST_MULT (8) + +/* Arbitrary DDL length limit + * Allows max buffer size of MAX-1 to MAX pages + * (depending on alignment) + */ +#define DDL_LEN_MAX (17) + +/* CCW 842 CI/FC masks + * NX P8 workbook, section 4.3.1, figure 4-6 + * "CI/FC Boundary by NX CT type" + */ +#define CCW_CI_842 (0x00003ff8) +#define CCW_FC_842 (0x00000007) + +/* CCW Function Codes (FC) for 842 + * NX P8 workbook, section 4.9, table 4-28 + * "Function Code Definitions for 842 Memory Compression" + */ +#define CCW_FC_842_COMP_NOCRC (0) +#define CCW_FC_842_COMP_CRC (1) +#define CCW_FC_842_DECOMP_NOCRC (2) +#define CCW_FC_842_DECOMP_CRC (3) +#define CCW_FC_842_MOVE (4) + +/* CSB CC Error Types for 842 + * NX P8 workbook, section 4.10.3, table 4-30 + * "Reported Error Types Summary Table" + */ +/* These are all duplicates of existing codes defined in icswx.h. */ +#define CSB_CC_TRANSLATION_DUP1 (80) +#define CSB_CC_TRANSLATION_DUP2 (82) +#define CSB_CC_TRANSLATION_DUP3 (84) +#define CSB_CC_TRANSLATION_DUP4 (86) +#define CSB_CC_TRANSLATION_DUP5 (92) +#define CSB_CC_TRANSLATION_DUP6 (94) +#define CSB_CC_PROTECTION_DUP1 (81) +#define CSB_CC_PROTECTION_DUP2 (83) +#define CSB_CC_PROTECTION_DUP3 (85) +#define CSB_CC_PROTECTION_DUP4 (87) +#define CSB_CC_PROTECTION_DUP5 (93) +#define CSB_CC_PROTECTION_DUP6 (95) +#define CSB_CC_RD_EXTERNAL_DUP1 (89) +#define CSB_CC_RD_EXTERNAL_DUP2 (90) +#define CSB_CC_RD_EXTERNAL_DUP3 (91) +/* These are specific to NX */ +/* 842 codes */ +#define CSB_CC_TPBC_GT_SPBC (64) /* no error, but >1 comp ratio */ +#define CSB_CC_CRC_MISMATCH (65) /* decomp crc mismatch */ +#define CSB_CC_TEMPL_INVALID (66) /* decomp invalid template value */ +#define CSB_CC_TEMPL_OVERFLOW (67) /* decomp template shows data after end */ +/* sym crypt codes */ +#define CSB_CC_DECRYPT_OVERFLOW (64) +/* asym crypt codes */ +#define CSB_CC_MINV_OVERFLOW (128) +/* These are reserved for hypervisor use */ +#define CSB_CC_HYP_RESERVE_START (240) +#define CSB_CC_HYP_RESERVE_END (253) +#define CSB_CC_HYP_NO_HW (254) +#define CSB_CC_HYP_HANG_ABORTED (255) + +/* CCB Completion Modes (CM) for 842 + * NX P8 workbook, section 4.3, figure 4-5 + * "CRB Details - Normal Cop_Req (CL=00, C=1)" + */ +#define CCB_CM_EXTRA_WRITE (CCB_CM0_ALL_COMPLETIONS & CCB_CM12_STORE) +#define CCB_CM_INTERRUPT (CCB_CM0_ALL_COMPLETIONS & CCB_CM12_INTERRUPT) + +#define LEN_ON_PAGE(pa) (PAGE_SIZE - ((pa) & ~PAGE_MASK)) + +static inline unsigned long nx842_get_pa(void *addr) +{ + if (!is_vmalloc_addr(addr)) + return __pa(addr); + + return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr); +} + +/* Get/Set bit fields */ +#define MASK_LSH(m) (__builtin_ffsl(m) - 1) +#define GET_FIELD(v, m) (((v) & (m)) >> MASK_LSH(m)) +#define SET_FIELD(v, m, val) (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m))) + +struct nx842_driver { + struct module *owner; + + struct nx842_constraints *constraints; + + int (*compress)(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem); + int (*decompress)(const unsigned char *in, unsigned int in_len, + unsigned char *out, unsigned int *out_len, + void *wrkmem); +}; + +void nx842_register_driver(struct nx842_driver *driver); +void nx842_unregister_driver(struct nx842_driver *driver); + + +/* To allow the main nx-compress module to load platform module */ +#define NX842_POWERNV_MODULE_NAME "nx-compress-powernv" +#define NX842_POWERNV_COMPAT_NAME "ibm,power-nx" +#define NX842_PSERIES_MODULE_NAME "nx-compress-pseries" +#define NX842_PSERIES_COMPAT_NAME "ibm,compression" + + +#endif /* __NX_842_H__ */ diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c index 88c5624..e4e64f6 100644 --- a/drivers/crypto/nx/nx-aes-gcm.c +++ b/drivers/crypto/nx/nx-aes-gcm.c @@ -96,9 +96,6 @@ out: static int gcm_aes_nx_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - if (authsize > crypto_aead_alg(tfm)->maxauthsize) - return -EINVAL; - crypto_aead_crt(tfm)->authsize = authsize; return 0; diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c index 23621da..4e91bdb 100644 --- a/drivers/crypto/nx/nx-sha256.c +++ b/drivers/crypto/nx/nx-sha256.c @@ -33,8 +33,9 @@ static int nx_sha256_init(struct shash_desc *desc) { struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -44,15 +45,18 @@ static int nx_sha256_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *) sctx->state, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc) - goto out; + if (len != SHA256_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be32(SHA256_H0); sctx->state[1] = __cpu_to_be32(SHA256_H1); @@ -64,7 +68,6 @@ static int nx_sha256_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be32(SHA256_H7); sctx->count = 0; -out: return 0; } @@ -74,10 +77,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process = 0, leftover, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -97,6 +102,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA256_BLOCK_SIZE data chunk to process in @@ -108,25 +119,22 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, + max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA256); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); to_process = (data_len + buf_len); leftover = total - to_process; @@ -173,12 +181,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) struct sha256_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; unsigned long irq_flags; - int rc; + u32 max_sg_len; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count >= SHA256_BLOCK_SIZE) { @@ -195,25 +210,24 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8); len = sctx->count & (SHA256_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *) sctx->buf, - NX_DS_SHA256); + in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf, + &len, max_sg_len); - if (rc || len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) + if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA256_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA256); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, max_sg_len); - if (rc || len != SHA256_DIGEST_SIZE) + if (len != SHA256_DIGEST_SIZE) { + rc = -EINVAL; goto out; + } + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; goto out; diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c index b3adf10..e6a58d2 100644 --- a/drivers/crypto/nx/nx-sha512.c +++ b/drivers/crypto/nx/nx-sha512.c @@ -32,8 +32,9 @@ static int nx_sha512_init(struct shash_desc *desc) { struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); + struct nx_sg *out_sg; int len; - int rc; + u32 max_sg_len; nx_ctx_init(nx_ctx, HCOP_FC_SHA); @@ -43,15 +44,18 @@ static int nx_sha512_init(struct shash_desc *desc) NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - (u8 *)sctx->state, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state, + &len, max_sg_len); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); - if (rc || len != SHA512_DIGEST_SIZE) - goto out; + if (len != SHA512_DIGEST_SIZE) + return -EINVAL; sctx->state[0] = __cpu_to_be64(SHA512_H0); sctx->state[1] = __cpu_to_be64(SHA512_H1); @@ -63,7 +67,6 @@ static int nx_sha512_init(struct shash_desc *desc) sctx->state[7] = __cpu_to_be64(SHA512_H7); sctx->count[0] = 0; -out: return 0; } @@ -73,10 +76,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg; u64 to_process, leftover = 0, total; unsigned long irq_flags; int rc = 0; int data_len; + u32 max_sg_len; u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE); spin_lock_irqsave(&nx_ctx->lock, irq_flags); @@ -96,6 +101,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE; NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION; + in_sg = nx_ctx->in_sg; + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + do { /* * to_process: the SHA512_BLOCK_SIZE data chunk to process in @@ -108,25 +119,26 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data, if (buf_len) { data_len = buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, + (u8 *) sctx->buf, + &data_len, max_sg_len); - if (rc || data_len != buf_len) + if (data_len != buf_len) { + rc = -EINVAL; goto out; + } } data_len = to_process - buf_len; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &data_len, - (u8 *) data, - NX_DS_SHA512); + in_sg = nx_build_sg_list(in_sg, (u8 *) data, + &data_len, max_sg_len); - if (rc || data_len != (to_process - buf_len)) + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + + if (data_len != (to_process - buf_len)) { + rc = -EINVAL; goto out; + } to_process = (data_len + buf_len); leftover = total - to_process; @@ -172,13 +184,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) struct sha512_state *sctx = shash_desc_ctx(desc); struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base); struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb; + struct nx_sg *in_sg, *out_sg; + u32 max_sg_len; u64 count0; unsigned long irq_flags; - int rc; + int rc = 0; int len; spin_lock_irqsave(&nx_ctx->lock, irq_flags); + max_sg_len = min_t(u64, nx_ctx->ap->sglen, + nx_driver.of.max_sg_len/sizeof(struct nx_sg)); + max_sg_len = min_t(u64, max_sg_len, + nx_ctx->ap->databytelen/NX_PAGE_SIZE); + /* final is represented by continuing the operation and indicating that * this is not an intermediate operation */ if (sctx->count[0] >= SHA512_BLOCK_SIZE) { @@ -200,24 +219,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out) csbcpb->cpb.sha512.message_bit_length_lo = count0; len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1); - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg, - &nx_ctx->op.inlen, - &len, - (u8 *)sctx->buf, - NX_DS_SHA512); + in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len, + max_sg_len); - if (rc || len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) + if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) { + rc = -EINVAL; goto out; + } len = SHA512_DIGEST_SIZE; - rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg, - &nx_ctx->op.outlen, - &len, - out, - NX_DS_SHA512); + out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, + max_sg_len); - if (rc) - goto out; + nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg); + nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg); if (!nx_ctx->op.outlen) { rc = -EINVAL; diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 1da6dc5..2e2529c 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -215,8 +215,15 @@ struct nx_sg *nx_walk_and_build(struct nx_sg *nx_dst, * @delta: is the amount we need to crop in order to bound the list. * */ -static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int delta) +static long int trim_sg_list(struct nx_sg *sg, + struct nx_sg *end, + unsigned int delta, + unsigned int *nbytes) { + long int oplen; + long int data_back; + unsigned int is_delta = delta; + while (delta && end > sg) { struct nx_sg *last = end - 1; @@ -228,54 +235,20 @@ static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int d delta -= last->len; } } - return (sg - end) * sizeof(struct nx_sg); -} - -/** - * nx_sha_build_sg_list - walk and build sg list to sha modes - * using right bounds and limits. - * @nx_ctx: NX crypto context for the lists we're building - * @nx_sg: current sg list in or out list - * @op_len: current op_len to be used in order to build a sg list - * @nbytes: number or bytes to be processed - * @offset: buf offset - * @mode: SHA256 or SHA512 - */ -int nx_sha_build_sg_list(struct nx_crypto_ctx *nx_ctx, - struct nx_sg *nx_in_outsg, - s64 *op_len, - unsigned int *nbytes, - u8 *offset, - u32 mode) -{ - unsigned int delta = 0; - unsigned int total = *nbytes; - struct nx_sg *nx_insg = nx_in_outsg; - unsigned int max_sg_len; - max_sg_len = min_t(u64, nx_ctx->ap->sglen, - nx_driver.of.max_sg_len/sizeof(struct nx_sg)); - max_sg_len = min_t(u64, max_sg_len, - nx_ctx->ap->databytelen/NX_PAGE_SIZE); - - *nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen); - nx_insg = nx_build_sg_list(nx_insg, offset, nbytes, max_sg_len); - - switch (mode) { - case NX_DS_SHA256: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA256_BLOCK_SIZE - 1)); - break; - case NX_DS_SHA512: - if (*nbytes < total) - delta = *nbytes - (*nbytes & ~(SHA512_BLOCK_SIZE - 1)); - break; - default: - return -EINVAL; + /* There are cases where we need to crop list in order to make it + * a block size multiple, but we also need to align data. In order to + * that we need to calculate how much we need to put back to be + * processed + */ + oplen = (sg - end) * sizeof(struct nx_sg); + if (is_delta) { + data_back = (abs(oplen) / AES_BLOCK_SIZE) * sg->len; + data_back = *nbytes - (data_back & ~(AES_BLOCK_SIZE - 1)); + *nbytes -= data_back; } - *op_len = trim_sg_list(nx_in_outsg, nx_insg, delta); - return 0; + return oplen; } /** @@ -330,8 +303,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx *nx_ctx, /* these lengths should be negative, which will indicate to phyp that * the input and output parameters are scatterlists, not linear * buffers */ - nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta); - nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta); + nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta, nbytes); + nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta, nbytes); return 0; } diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h index 6c9ecaa..41b87ee 100644 --- a/drivers/crypto/nx/nx.h +++ b/drivers/crypto/nx/nx.h @@ -153,8 +153,6 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm); void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function); int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op, u32 may_sleep); -int nx_sha_build_sg_list(struct nx_crypto_ctx *, struct nx_sg *, - s64 *, unsigned int *, u8 *, u32); struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32); int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *, struct scatterlist *, struct scatterlist *, unsigned int *, diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4d63e0d..b2024c95 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -362,7 +362,13 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req) static int omap_sham_hw_init(struct omap_sham_dev *dd) { - pm_runtime_get_sync(dd->dev); + int err; + + err = pm_runtime_get_sync(dd->dev); + if (err < 0) { + dev_err(dd->dev, "failed to get sync: %d\n", err); + return err; + } if (!test_bit(FLAGS_INIT, &dd->flags)) { set_bit(FLAGS_INIT, &dd->flags); @@ -1793,6 +1799,10 @@ static const struct of_device_id omap_sham_of_match[] = { .data = &omap_sham_pdata_omap2, }, { + .compatible = "ti,omap3-sham", + .data = &omap_sham_pdata_omap2, + }, + { .compatible = "ti,omap4-sham", .data = &omap_sham_pdata_omap4, }, @@ -1947,7 +1957,13 @@ static int omap_sham_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_irq_safe(dev); - pm_runtime_get_sync(dev); + + err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get sync: %d\n", err); + goto err_pm; + } + rev = omap_sham_read(dd, SHA_REG_REV(dd)); pm_runtime_put_sync(&pdev->dev); @@ -1977,6 +1993,7 @@ err_algs: for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) crypto_unregister_ahash( &dd->pdata->algs_info[i].algs_list[j]); +err_pm: pm_runtime_disable(dev); if (dd->dma_lch) dma_release_channel(dd->dma_lch); @@ -2019,7 +2036,11 @@ static int omap_sham_suspend(struct device *dev) static int omap_sham_resume(struct device *dev) { - pm_runtime_get_sync(dev); + int err = pm_runtime_get_sync(dev); + if (err < 0) { + dev_err(dev, "failed to get sync: %d\n", err); + return err; + } return 0; } #endif diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 5da5b98..eb2a0ca 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/aes.h> #include <crypto/algapi.h> #include <crypto/authenc.h> @@ -790,7 +790,8 @@ static int spacc_aead_cra_init(struct crypto_tfm *tfm) get_random_bytes(ctx->salt, sizeof(ctx->salt)); - tfm->crt_aead.reqsize = sizeof(struct spacc_req); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct spacc_req)); return 0; } diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index f22ce71..5fe9029 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -48,7 +48,6 @@ #define ADF_ACCEL_DEVICES_H_ #include <linux/module.h> #include <linux/list.h> -#include <linux/proc_fs.h> #include <linux/io.h> #include "adf_cfg_common.h" diff --git a/drivers/crypto/qat/qat_common/adf_cfg_user.h b/drivers/crypto/qat/qat_common/adf_cfg_user.h index 0c38a15..ef5988a 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_user.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_user.h @@ -54,14 +54,6 @@ struct adf_user_cfg_key_val { char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; union { - char *user_val_ptr; - uint64_t padding1; - }; - union { - struct adf_user_cfg_key_val *prev; - uint64_t padding2; - }; - union { struct adf_user_cfg_key_val *next; uint64_t padding3; }; @@ -75,10 +67,6 @@ struct adf_user_cfg_section { uint64_t padding1; }; union { - struct adf_user_cfg_section *prev; - uint64_t padding2; - }; - union { struct adf_user_cfg_section *next; uint64_t padding3; }; diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 0666ee6..27e16c0 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -53,6 +53,13 @@ #include "icp_qat_fw_loader_handle.h" #include "icp_qat_hal.h" +#define ADF_MAJOR_VERSION 0 +#define ADF_MINOR_VERSION 1 +#define ADF_BUILD_VERSION 3 +#define ADF_DRV_VERSION __stringify(ADF_MAJOR_VERSION) "." \ + __stringify(ADF_MINOR_VERSION) "." \ + __stringify(ADF_BUILD_VERSION) + #define ADF_STATUS_RESTARTING 0 #define ADF_STATUS_STARTING 1 #define ADF_STATUS_CONFIGURED 2 diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index cb5f066..e056b9e 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -504,3 +504,4 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); MODULE_ALIAS_CRYPTO("intel_qat"); +MODULE_VERSION(ADF_DRV_VERSION); diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1dc5b0a..dc231b8 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -47,7 +47,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/crypto.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/hash.h> @@ -1094,8 +1094,9 @@ static int qat_alg_aead_init(struct crypto_tfm *tfm, return -EFAULT; spin_lock_init(&ctx->lock); ctx->qat_hash_alg = hash; - tfm->crt_aead.reqsize = sizeof(struct aead_request) + - sizeof(struct qat_crypto_request); + crypto_aead_set_reqsize(__crypto_aead_cast(tfm), + sizeof(struct aead_request) + + sizeof(struct qat_crypto_request)); ctx->tfm = tfm; return 0; } diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index 9decea2..ecf0ef1 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -417,5 +417,6 @@ module_exit(adfdrv_release); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Intel"); -MODULE_FIRMWARE("qat_895xcc.bin"); +MODULE_FIRMWARE(ADF_DH895XCC_FW); MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); +MODULE_VERSION(ADF_DRV_VERSION); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414a..83aca95 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -46,7 +46,7 @@ #include <crypto/des.h> #include <crypto/sha.h> #include <crypto/md5.h> -#include <crypto/aead.h> +#include <crypto/internal/aead.h> #include <crypto/authenc.h> #include <crypto/skcipher.h> #include <crypto/hash.h> @@ -55,49 +55,92 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, + bool is_sec1) { - talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - talitos_ptr->eptr = upper_32_bits(dma_addr); + ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); + if (!is_sec1) + ptr->eptr = upper_32_bits(dma_addr); +} + +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len, + bool is_sec1) +{ + if (is_sec1) { + ptr->res = 0; + ptr->len1 = cpu_to_be16(len); + } else { + ptr->len = cpu_to_be16(len); + } +} + +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, + bool is_sec1) +{ + if (is_sec1) + return be16_to_cpu(ptr->len1); + else + return be16_to_cpu(ptr->len); +} + +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent = 0; } /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ static void map_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, - unsigned short len, void *data, - unsigned char extent, + struct talitos_ptr *ptr, + unsigned int len, void *data, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); - talitos_ptr->len = cpu_to_be16(len); - to_talitos_ptr(talitos_ptr, dma_addr); - talitos_ptr->j_extent = extent; + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr(ptr, dma_addr, is_sec1); + to_talitos_ptr_extent_clear(ptr, is_sec1); } /* * unmap bus single (contiguous) h/w descriptor pointer */ static void unmap_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, + struct talitos_ptr *ptr, enum dma_data_direction dir) { - dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr), - be16_to_cpu(talitos_ptr->len), dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), + from_talitos_ptr_len(ptr, is_sec1), dir); } static int reset_channel(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; + bool is_sec1 = has_ftr_sec1(priv); - setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET); + if (is_sec1) { + setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, + TALITOS1_CCCR_LO_RESET); - while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET) - && --timeout) - cpu_relax(); + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) & + TALITOS1_CCCR_LO_RESET) && --timeout) + cpu_relax(); + } else { + setbits32(priv->chan[ch].reg + TALITOS_CCCR, + TALITOS2_CCCR_RESET); + + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & + TALITOS2_CCCR_RESET) && --timeout) + cpu_relax(); + } if (timeout == 0) { dev_err(dev, "failed to reset channel %d\n", ch); @@ -120,11 +163,12 @@ static int reset_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - u32 mcr = TALITOS_MCR_SWR; + bool is_sec1 = has_ftr_sec1(priv); + u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR; setbits32(priv->reg + TALITOS_MCR, mcr); - while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR) + while ((in_be32(priv->reg + TALITOS_MCR) & mcr) && --timeout) cpu_relax(); @@ -148,6 +192,7 @@ static int init_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); int ch, err; + bool is_sec1 = has_ftr_sec1(priv); /* * Master reset @@ -171,12 +216,19 @@ static int init_device(struct device *dev) } /* enable channel done and error interrupts */ - setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT); - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); + if (is_sec1) { + clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT); + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); + /* disable parity error check in DEU (erroneous? test vect.) */ + setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE); + } else { + setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT); + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); + } /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) - setbits32(priv->reg + TALITOS_MDEUICR_LO, + setbits32(priv->reg_mdeu + TALITOS_EUICR_LO, TALITOS_MDEUICR_LO_ICE); return 0; @@ -204,6 +256,7 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, struct talitos_request *request; unsigned long flags; int head; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].head_lock, flags); @@ -217,8 +270,17 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, request = &priv->chan[ch].fifo[head]; /* map descriptor and save caller data */ - request->dma_desc = dma_map_single(dev, desc, sizeof(*desc), - DMA_BIDIRECTIONAL); + if (is_sec1) { + desc->hdr1 = desc->hdr; + desc->next_desc = 0; + request->dma_desc = dma_map_single(dev, &desc->hdr1, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } else { + request->dma_desc = dma_map_single(dev, desc, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } request->callback = callback; request->context = context; @@ -250,16 +312,21 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) struct talitos_request *request, saved_req; unsigned long flags; int tail, status; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].tail_lock, flags); tail = priv->chan[ch].tail; while (priv->chan[ch].fifo[tail].desc) { + __be32 hdr; + request = &priv->chan[ch].fifo[tail]; /* descriptors with their done bits set don't get the error */ rmb(); - if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE) + hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr; + + if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE) status = 0; else if (!error) @@ -268,7 +335,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) status = error; dma_unmap_single(dev, request->dma_desc, - sizeof(struct talitos_desc), + TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL); /* copy entries so we can call callback outside lock */ @@ -302,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) /* * process completed requests for channels that have done status */ -#define DEF_TALITOS_DONE(name, ch_done_mask) \ -static void talitos_done_##name(unsigned long data) \ +#define DEF_TALITOS1_DONE(name, ch_done_mask) \ +static void talitos1_done_##name(unsigned long data) \ +{ \ + struct device *dev = (struct device *)data; \ + struct talitos_private *priv = dev_get_drvdata(dev); \ + unsigned long flags; \ + \ + if (ch_done_mask & 0x10000000) \ + flush_channel(dev, 0, 0, 0); \ + if (priv->num_channels == 1) \ + goto out; \ + if (ch_done_mask & 0x40000000) \ + flush_channel(dev, 1, 0, 0); \ + if (ch_done_mask & 0x00010000) \ + flush_channel(dev, 2, 0, 0); \ + if (ch_done_mask & 0x00040000) \ + flush_channel(dev, 3, 0, 0); \ + \ +out: \ + /* At this point, all completed channels have been processed */ \ + /* Unmask done interrupts for channels completed later on. */ \ + spin_lock_irqsave(&priv->reg_lock, flags); \ + clrbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ +} + +DEF_TALITOS1_DONE(4ch, TALITOS1_ISR_4CHDONE) + +#define DEF_TALITOS2_DONE(name, ch_done_mask) \ +static void talitos2_done_##name(unsigned long data) \ { \ struct device *dev = (struct device *)data; \ struct talitos_private *priv = dev_get_drvdata(dev); \ @@ -325,12 +421,13 @@ out: \ /* Unmask done interrupts for channels completed later on. */ \ spin_lock_irqsave(&priv->reg_lock, flags); \ setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); \ + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); \ spin_unlock_irqrestore(&priv->reg_lock, flags); \ } -DEF_TALITOS_DONE(4ch, TALITOS_ISR_4CHDONE) -DEF_TALITOS_DONE(ch0_2, TALITOS_ISR_CH_0_2_DONE) -DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE) + +DEF_TALITOS2_DONE(4ch, TALITOS2_ISR_4CHDONE) +DEF_TALITOS2_DONE(ch0_2, TALITOS2_ISR_CH_0_2_DONE) +DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE) /* * locate current (offending) descriptor @@ -377,44 +474,44 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: dev_err(dev, "AFEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AFEUISR), - in_be32(priv->reg + TALITOS_AFEUISR_LO)); + in_be32(priv->reg_afeu + TALITOS_EUISR), + in_be32(priv->reg_afeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_DEU: dev_err(dev, "DEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_DEUISR), - in_be32(priv->reg + TALITOS_DEUISR_LO)); + in_be32(priv->reg_deu + TALITOS_EUISR), + in_be32(priv->reg_deu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_MDEUA: case DESC_HDR_SEL0_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_RNG: dev_err(dev, "RNGUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_RNGUISR), - in_be32(priv->reg + TALITOS_RNGUISR_LO)); + in_be32(priv->reg_rngu + TALITOS_ISR), + in_be32(priv->reg_rngu + TALITOS_ISR_LO)); break; case DESC_HDR_SEL0_PKEU: dev_err(dev, "PKEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_PKEUISR), - in_be32(priv->reg + TALITOS_PKEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_AESU: dev_err(dev, "AESUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AESUISR), - in_be32(priv->reg + TALITOS_AESUISR_LO)); + in_be32(priv->reg_aesu + TALITOS_EUISR), + in_be32(priv->reg_aesu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_KEU: dev_err(dev, "KEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_KEUISR), - in_be32(priv->reg + TALITOS_KEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; } @@ -422,13 +519,13 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) case DESC_HDR_SEL1_MDEUA: case DESC_HDR_SEL1_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL1_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; } @@ -445,17 +542,24 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - int ch, error, reset_dev = 0, reset_ch = 0; - u32 v, v_lo; + int ch, error, reset_dev = 0; + u32 v_lo; + bool is_sec1 = has_ftr_sec1(priv); + int reset_ch = is_sec1 ? 1 : 0; /* only SEC2 supports continuation */ for (ch = 0; ch < priv->num_channels; ch++) { /* skip channels without errors */ - if (!(isr & (1 << (ch * 2 + 1)))) - continue; + if (is_sec1) { + /* bits 29, 31, 17, 19 */ + if (!(isr & (1 << (29 + (ch & 1) * 2 - (ch & 2) * 6)))) + continue; + } else { + if (!(isr & (1 << (ch * 2 + 1)))) + continue; + } error = -EINVAL; - v = in_be32(priv->chan[ch].reg + TALITOS_CCPSR); v_lo = in_be32(priv->chan[ch].reg + TALITOS_CCPSR_LO); if (v_lo & TALITOS_CCPSR_LO_DOF) { @@ -471,23 +575,28 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) if (v_lo & TALITOS_CCPSR_LO_MDTE) dev_err(dev, "master data transfer error\n"); if (v_lo & TALITOS_CCPSR_LO_SGDLZ) - dev_err(dev, "s/g data length zero error\n"); + dev_err(dev, is_sec1 ? "pointeur not complete error\n" + : "s/g data length zero error\n"); if (v_lo & TALITOS_CCPSR_LO_FPZ) - dev_err(dev, "fetch pointer zero error\n"); + dev_err(dev, is_sec1 ? "parity error\n" + : "fetch pointer zero error\n"); if (v_lo & TALITOS_CCPSR_LO_IDH) dev_err(dev, "illegal descriptor header error\n"); if (v_lo & TALITOS_CCPSR_LO_IEU) - dev_err(dev, "invalid execution unit error\n"); + dev_err(dev, is_sec1 ? "static assignment error\n" + : "invalid exec unit error\n"); if (v_lo & TALITOS_CCPSR_LO_EU) report_eu_error(dev, ch, current_desc_hdr(dev, ch)); - if (v_lo & TALITOS_CCPSR_LO_GB) - dev_err(dev, "gather boundary error\n"); - if (v_lo & TALITOS_CCPSR_LO_GRL) - dev_err(dev, "gather return/length error\n"); - if (v_lo & TALITOS_CCPSR_LO_SB) - dev_err(dev, "scatter boundary error\n"); - if (v_lo & TALITOS_CCPSR_LO_SRL) - dev_err(dev, "scatter return/length error\n"); + if (!is_sec1) { + if (v_lo & TALITOS_CCPSR_LO_GB) + dev_err(dev, "gather boundary error\n"); + if (v_lo & TALITOS_CCPSR_LO_GRL) + dev_err(dev, "gather return/length error\n"); + if (v_lo & TALITOS_CCPSR_LO_SB) + dev_err(dev, "scatter boundary error\n"); + if (v_lo & TALITOS_CCPSR_LO_SRL) + dev_err(dev, "scatter return/length error\n"); + } flush_channel(dev, ch, error, reset_ch); @@ -495,10 +604,10 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) reset_channel(dev, ch); } else { setbits32(priv->chan[ch].reg + TALITOS_CCCR, - TALITOS_CCCR_CONT); + TALITOS2_CCCR_CONT); setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, 0); while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & - TALITOS_CCCR_CONT) && --timeout) + TALITOS2_CCCR_CONT) && --timeout) cpu_relax(); if (timeout == 0) { dev_err(dev, "failed to restart channel %d\n", @@ -507,9 +616,14 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) } } } - if (reset_dev || isr & ~TALITOS_ISR_4CHERR || isr_lo) { - dev_err(dev, "done overflow, internal time out, or rngu error: " - "ISR 0x%08x_%08x\n", isr, isr_lo); + if (reset_dev || (is_sec1 && isr & ~TALITOS1_ISR_4CHERR) || + (!is_sec1 && isr & ~TALITOS2_ISR_4CHERR) || isr_lo) { + if (is_sec1 && (isr_lo & TALITOS1_ISR_TEA_ERR)) + dev_err(dev, "TEA error: ISR 0x%08x_%08x\n", + isr, isr_lo); + else + dev_err(dev, "done overflow, internal time out, or " + "rngu error: ISR 0x%08x_%08x\n", isr, isr_lo); /* purge request queues */ for (ch = 0; ch < priv->num_channels; ch++) @@ -520,8 +634,43 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo) } } -#define DEF_TALITOS_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ -static irqreturn_t talitos_interrupt_##name(int irq, void *data) \ +#define DEF_TALITOS1_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ +static irqreturn_t talitos1_interrupt_##name(int irq, void *data) \ +{ \ + struct device *dev = data; \ + struct talitos_private *priv = dev_get_drvdata(dev); \ + u32 isr, isr_lo; \ + unsigned long flags; \ + \ + spin_lock_irqsave(&priv->reg_lock, flags); \ + isr = in_be32(priv->reg + TALITOS_ISR); \ + isr_lo = in_be32(priv->reg + TALITOS_ISR_LO); \ + /* Acknowledge interrupt */ \ + out_be32(priv->reg + TALITOS_ICR, isr & (ch_done_mask | ch_err_mask)); \ + out_be32(priv->reg + TALITOS_ICR_LO, isr_lo); \ + \ + if (unlikely(isr & ch_err_mask || isr_lo & TALITOS1_IMR_LO_INIT)) { \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ + talitos_error(dev, isr & ch_err_mask, isr_lo); \ + } \ + else { \ + if (likely(isr & ch_done_mask)) { \ + /* mask further done interrupts. */ \ + setbits32(priv->reg + TALITOS_IMR, ch_done_mask); \ + /* done_task will unmask done interrupts at exit */ \ + tasklet_schedule(&priv->done_task[tlet]); \ + } \ + spin_unlock_irqrestore(&priv->reg_lock, flags); \ + } \ + \ + return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \ + IRQ_NONE; \ +} + +DEF_TALITOS1_INTERRUPT(4ch, TALITOS1_ISR_4CHDONE, TALITOS1_ISR_4CHERR, 0) + +#define DEF_TALITOS2_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet) \ +static irqreturn_t talitos2_interrupt_##name(int irq, void *data) \ { \ struct device *dev = data; \ struct talitos_private *priv = dev_get_drvdata(dev); \ @@ -552,9 +701,12 @@ static irqreturn_t talitos_interrupt_##name(int irq, void *data) \ return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED : \ IRQ_NONE; \ } -DEF_TALITOS_INTERRUPT(4ch, TALITOS_ISR_4CHDONE, TALITOS_ISR_4CHERR, 0) -DEF_TALITOS_INTERRUPT(ch0_2, TALITOS_ISR_CH_0_2_DONE, TALITOS_ISR_CH_0_2_ERR, 0) -DEF_TALITOS_INTERRUPT(ch1_3, TALITOS_ISR_CH_1_3_DONE, TALITOS_ISR_CH_1_3_ERR, 1) + +DEF_TALITOS2_INTERRUPT(4ch, TALITOS2_ISR_4CHDONE, TALITOS2_ISR_4CHERR, 0) +DEF_TALITOS2_INTERRUPT(ch0_2, TALITOS2_ISR_CH_0_2_DONE, TALITOS2_ISR_CH_0_2_ERR, + 0) +DEF_TALITOS2_INTERRUPT(ch1_3, TALITOS2_ISR_CH_1_3_DONE, TALITOS2_ISR_CH_1_3_ERR, + 1) /* * hwrng @@ -567,7 +719,7 @@ static int talitos_rng_data_present(struct hwrng *rng, int wait) int i; for (i = 0; i < 20; i++) { - ofl = in_be32(priv->reg + TALITOS_RNGUSR_LO) & + ofl = in_be32(priv->reg_rngu + TALITOS_EUSR_LO) & TALITOS_RNGUSR_LO_OFL; if (ofl || !wait) break; @@ -583,8 +735,8 @@ static int talitos_rng_data_read(struct hwrng *rng, u32 *data) struct talitos_private *priv = dev_get_drvdata(dev); /* rng fifo requires 64-bit accesses */ - *data = in_be32(priv->reg + TALITOS_RNGU_FIFO); - *data = in_be32(priv->reg + TALITOS_RNGU_FIFO_LO); + *data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO); + *data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO_LO); return sizeof(u32); } @@ -595,8 +747,9 @@ static int talitos_rng_init(struct hwrng *rng) struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - setbits32(priv->reg + TALITOS_RNGURCR_LO, TALITOS_RNGURCR_LO_SR); - while (!(in_be32(priv->reg + TALITOS_RNGUSR_LO) & TALITOS_RNGUSR_LO_RD) + setbits32(priv->reg_rngu + TALITOS_EURCR_LO, TALITOS_RNGURCR_LO_SR); + while (!(in_be32(priv->reg_rngu + TALITOS_EUSR_LO) + & TALITOS_RNGUSR_LO_RD) && --timeout) cpu_relax(); if (timeout == 0) { @@ -605,7 +758,7 @@ static int talitos_rng_init(struct hwrng *rng) } /* start generating */ - setbits32(priv->reg + TALITOS_RNGUDSR_LO, 0); + setbits32(priv->reg_rngu + TALITOS_EUDSR_LO, 0); return 0; } @@ -661,7 +814,7 @@ struct talitos_ahash_req_ctx { unsigned int first; unsigned int last; unsigned int to_hash_later; - u64 nbuf; + unsigned int nbuf; struct scatterlist bufsl[2]; struct scatterlist *psrc; }; @@ -712,9 +865,10 @@ badkey: * @dst_chained: whether dst is chained or not * @iv_dma: dma address of iv for checking continuity and link table * @dma_len: length of dma mapped link_tbl space - * @dma_link_tbl: bus physical address of link_tbl + * @dma_link_tbl: bus physical address of link_tbl/buf * @desc: h/w descriptor - * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) + * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2) + * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1) * * if decrypting (with authcheck), or either one of src_nents or dst_nents * is greater than 1, an integrity check value is concatenated to the end @@ -731,7 +885,10 @@ struct talitos_edesc { int dma_len; dma_addr_t dma_link_tbl; struct talitos_desc desc; - struct talitos_ptr link_tbl[0]; + union { + struct talitos_ptr link_tbl[0]; + u8 buf[0]; + }; }; static int talitos_map_sg(struct device *dev, struct scatterlist *sg, @@ -907,8 +1064,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, { int n_sg = sg_count; - while (n_sg--) { - to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg)); + while (sg && n_sg--) { + to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0); link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg)); link_tbl_ptr->j_extent = 0; link_tbl_ptr++; @@ -925,7 +1082,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, sg_count--; link_tbl_ptr--; } - be16_add_cpu(&link_tbl_ptr->len, cryptlen); + link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len) + + cryptlen); /* tag end of link table */ link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -953,7 +1111,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, - 0, DMA_TO_DEVICE); + DMA_TO_DEVICE); /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize); @@ -962,7 +1120,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; /* assoc_nents - 1 entries for assoc, 1 for IV */ @@ -973,7 +1131,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_ptr += sg_count - 1; tbl_ptr->j_extent = 0; tbl_ptr++; - to_talitos_ptr(tbl_ptr, edesc->iv_dma); + to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0); tbl_ptr->len = cpu_to_be16(ivsize); tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -982,14 +1140,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } else { if (areq->assoclen) to_talitos_ptr(&desc->ptr[1], - sg_dma_address(areq->assoc)); + sg_dma_address(areq->assoc), 0); else - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); + to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, 0); desc->ptr[1].j_extent = 0; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma); + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); desc->ptr[2].len = cpu_to_be16(ivsize); desc->ptr[2].j_extent = 0; /* Sync needed for the aead_givencrypt case */ @@ -997,7 +1155,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, 0, + (char *)&ctx->key + ctx->authkeylen, DMA_TO_DEVICE); /* @@ -1015,7 +1173,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, edesc->src_chained); if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src)); + to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0); } else { sg_link_tbl_len = cryptlen; @@ -1026,14 +1184,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, &edesc->link_tbl[0]); if (sg_count > 1) { desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl); + to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl, 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); } else { /* Only one segment now, so no link tbl needed */ to_talitos_ptr(&desc->ptr[4], - sg_dma_address(areq->src)); + sg_dma_address(areq->src), 0); } } @@ -1047,13 +1205,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, DMA_FROM_DEVICE, edesc->dst_chained); if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst)); + to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0); } else { int tbl_off = edesc->src_nents + 1; struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr)); + tbl_off * sizeof(struct talitos_ptr), 0); sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, tbl_ptr); @@ -1068,14 +1226,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + (tbl_off + edesc->dst_nents + 1 + edesc->assoc_nents) * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, DMA_FROM_DEVICE); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); @@ -1095,7 +1253,7 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, bool *chained) int sg_nents = 0; *chained = false; - while (nbytes > 0) { + while (nbytes > 0 && sg) { sg_nents++; nbytes -= sg->length; if (!sg_is_last(sg) && (sg + 1)->length == 0) @@ -1128,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) { + if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } @@ -1173,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, */ alloc_len = sizeof(struct talitos_edesc); if (assoc_nents || src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 2 + assoc_nents) * - sizeof(struct talitos_ptr) + authsize; + if (is_sec1) + dma_len = (src_nents ? cryptlen : 0) + + (dst_nents ? cryptlen : 0); + else + dma_len = (src_nents + dst_nents + 2 + assoc_nents) * + sizeof(struct talitos_ptr) + authsize; alloc_len += dma_len; } else { dma_len = 0; @@ -1327,16 +1492,43 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, + struct scatterlist *dst, unsigned int len, + struct talitos_edesc *edesc) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, + dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } +} + static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + + unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1358,6 +1550,102 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + sg_count = edesc->src_nents ? : 1; + + if (sg_count == 1) { + dma_map_sg(dev, src, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_copy_to_buffer(src, sg_count, edesc->buf, len); + to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + len, DMA_TO_DEVICE); + } + } else { + to_talitos_ptr_extent_clear(ptr, is_sec1); + + sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, + edesc->src_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + dma_sync_single_for_device(dev, + edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed*/ + to_talitos_ptr(ptr, sg_dma_address(src), + is_sec1); + } + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (dir != DMA_NONE) + sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, + dir, edesc->dst_chained); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + if (sg_count == 1) { + if (dir != DMA_NONE) + dma_map_sg(dev, dst, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + } + } else { + to_talitos_ptr_extent_clear(ptr, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr), 0); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } + } +} + static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1371,83 +1659,41 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* first DWORD empty */ - desc->ptr[0].len = 0; - to_talitos_ptr(&desc->ptr[0], 0); - desc->ptr[0].j_extent = 0; + desc->ptr[0] = zero_entry; /* cipher iv */ - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); - desc->ptr[1].len = cpu_to_be16(ivsize); - desc->ptr[1].j_extent = 0; + to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); + to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); /* * cipher in */ - desc->ptr[3].len = cpu_to_be16(cryptlen); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[3], sg_dma_address(areq->src)); - } else { - sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl); - desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed */ - to_talitos_ptr(&desc->ptr[3], - sg_dma_address(areq->src)); - } - } + sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE, + &desc->ptr[3]); /* cipher out */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = 0; - - if (areq->src != areq->dst) - sg_count = talitos_map_sg(dev, areq->dst, - edesc->dst_nents ? : 1, - DMA_FROM_DEVICE, edesc->dst_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->dst)); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr)); - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; - sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen, - link_tbl_ptr); - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); - } + map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, + (areq->src == areq->dst) ? DMA_NONE + : DMA_FROM_DEVICE, + &desc->ptr[4], sg_count); /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, DMA_FROM_DEVICE); /* last DWORD empty */ - desc->ptr[6].len = 0; - to_talitos_ptr(&desc->ptr[6], 0); - desc->ptr[6].j_extent = 0; + desc->ptr[6] = zero_entry; ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { @@ -1507,20 +1753,22 @@ static void common_nonsnoop_hash_unmap(struct device *dev, struct ahash_request *areq) { struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + /* When using hashctx-in, must unmap it. */ - if (edesc->desc.ptr[1].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - if (edesc->desc.ptr[2].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[2], is_sec1)) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1548,6 +1796,27 @@ static void ahash_done(struct device *dev, areq->base.complete(&areq->base, err); } +/* + * SEC1 doesn't like hashing of 0 sized message, so we do the padding + * ourself and submit a padded block + */ +void talitos_handle_buggy_hash(struct talitos_ctx *ctx, + struct talitos_edesc *edesc, + struct talitos_ptr *ptr) +{ + static u8 padded_hash[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + pr_err_once("Bug in SEC1, padding ourself\n"); + edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD; + map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash), + (char *)padded_hash, DMA_TO_DEVICE); +} + static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct ahash_request *areq, unsigned int length, void (*callback) (struct device *dev, @@ -1559,7 +1828,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; - int sg_count, ret; + int ret; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1568,7 +1839,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (!req_ctx->first || req_ctx->swinit) { map_single_talitos_ptr(dev, &desc->ptr[1], req_ctx->hw_context_size, - (char *)req_ctx->hw_context, 0, + (char *)req_ctx->hw_context, DMA_TO_DEVICE); req_ctx->swinit = 0; } else { @@ -1580,38 +1851,15 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* HMAC key */ if (ctx->keylen) map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); else desc->ptr[2] = zero_entry; /* * data in */ - desc->ptr[3].len = cpu_to_be16(length); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, req_ctx->psrc, - edesc->src_nents ? : 1, - DMA_TO_DEVICE, edesc->src_chained); - - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc)); - } else { - sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length, - &edesc->link_tbl[0]); - if (sg_count > 1) { - desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP; - to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl); - dma_sync_single_for_device(ctx->dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed */ - to_talitos_ptr(&desc->ptr[3], - sg_dma_address(req_ctx->psrc)); - } - } + map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, + DMA_TO_DEVICE, &desc->ptr[3]); /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1620,15 +1868,18 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (req_ctx->last) map_single_talitos_ptr(dev, &desc->ptr[5], crypto_ahash_digestsize(tfm), - areq->result, 0, DMA_FROM_DEVICE); + areq->result, DMA_FROM_DEVICE); else map_single_talitos_ptr(dev, &desc->ptr[5], req_ctx->hw_context_size, - req_ctx->hw_context, 0, DMA_FROM_DEVICE); + req_ctx->hw_context, DMA_FROM_DEVICE); /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) + talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); @@ -2561,6 +2812,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, break; default: dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type); + kfree(t_alg); return ERR_PTR(-EINVAL); } @@ -2581,29 +2833,35 @@ static int talitos_probe_irq(struct platform_device *ofdev) struct device_node *np = ofdev->dev.of_node; struct talitos_private *priv = dev_get_drvdata(dev); int err; + bool is_sec1 = has_ftr_sec1(priv); priv->irq[0] = irq_of_parse_and_map(np, 0); if (!priv->irq[0]) { dev_err(dev, "failed to map irq\n"); return -EINVAL; } + if (is_sec1) { + err = request_irq(priv->irq[0], talitos1_interrupt_4ch, 0, + dev_driver_string(dev), dev); + goto primary_out; + } priv->irq[1] = irq_of_parse_and_map(np, 1); /* get the primary irq line */ if (!priv->irq[1]) { - err = request_irq(priv->irq[0], talitos_interrupt_4ch, 0, + err = request_irq(priv->irq[0], talitos2_interrupt_4ch, 0, dev_driver_string(dev), dev); goto primary_out; } - err = request_irq(priv->irq[0], talitos_interrupt_ch0_2, 0, + err = request_irq(priv->irq[0], talitos2_interrupt_ch0_2, 0, dev_driver_string(dev), dev); if (err) goto primary_out; /* get the secondary irq line */ - err = request_irq(priv->irq[1], talitos_interrupt_ch1_3, 0, + err = request_irq(priv->irq[1], talitos2_interrupt_ch1_3, 0, dev_driver_string(dev), dev); if (err) { dev_err(dev, "failed to request secondary irq\n"); @@ -2630,6 +2888,7 @@ static int talitos_probe(struct platform_device *ofdev) struct talitos_private *priv; const unsigned int *prop; int i, err; + int stride; priv = kzalloc(sizeof(struct talitos_private), GFP_KERNEL); if (!priv) @@ -2643,20 +2902,6 @@ static int talitos_probe(struct platform_device *ofdev) spin_lock_init(&priv->reg_lock); - err = talitos_probe_irq(ofdev); - if (err) - goto err_out; - - if (!priv->irq[1]) { - tasklet_init(&priv->done_task[0], talitos_done_4ch, - (unsigned long)dev); - } else { - tasklet_init(&priv->done_task[0], talitos_done_ch0_2, - (unsigned long)dev); - tasklet_init(&priv->done_task[1], talitos_done_ch1_3, - (unsigned long)dev); - } - priv->reg = of_iomap(np, 0); if (!priv->reg) { dev_err(dev, "failed to of_iomap\n"); @@ -2696,6 +2941,53 @@ static int talitos_probe(struct platform_device *ofdev) TALITOS_FTR_SHA224_HWINIT | TALITOS_FTR_HMAC_OK; + if (of_device_is_compatible(np, "fsl,sec1.0")) + priv->features |= TALITOS_FTR_SEC1; + + if (of_device_is_compatible(np, "fsl,sec1.2")) { + priv->reg_deu = priv->reg + TALITOS12_DEU; + priv->reg_aesu = priv->reg + TALITOS12_AESU; + priv->reg_mdeu = priv->reg + TALITOS12_MDEU; + stride = TALITOS1_CH_STRIDE; + } else if (of_device_is_compatible(np, "fsl,sec1.0")) { + priv->reg_deu = priv->reg + TALITOS10_DEU; + priv->reg_aesu = priv->reg + TALITOS10_AESU; + priv->reg_mdeu = priv->reg + TALITOS10_MDEU; + priv->reg_afeu = priv->reg + TALITOS10_AFEU; + priv->reg_rngu = priv->reg + TALITOS10_RNGU; + priv->reg_pkeu = priv->reg + TALITOS10_PKEU; + stride = TALITOS1_CH_STRIDE; + } else { + priv->reg_deu = priv->reg + TALITOS2_DEU; + priv->reg_aesu = priv->reg + TALITOS2_AESU; + priv->reg_mdeu = priv->reg + TALITOS2_MDEU; + priv->reg_afeu = priv->reg + TALITOS2_AFEU; + priv->reg_rngu = priv->reg + TALITOS2_RNGU; + priv->reg_pkeu = priv->reg + TALITOS2_PKEU; + priv->reg_keu = priv->reg + TALITOS2_KEU; + priv->reg_crcu = priv->reg + TALITOS2_CRCU; + stride = TALITOS2_CH_STRIDE; + } + + err = talitos_probe_irq(ofdev); + if (err) + goto err_out; + + if (of_device_is_compatible(np, "fsl,sec1.0")) { + tasklet_init(&priv->done_task[0], talitos1_done_4ch, + (unsigned long)dev); + } else { + if (!priv->irq[1]) { + tasklet_init(&priv->done_task[0], talitos2_done_4ch, + (unsigned long)dev); + } else { + tasklet_init(&priv->done_task[0], talitos2_done_ch0_2, + (unsigned long)dev); + tasklet_init(&priv->done_task[1], talitos2_done_ch1_3, + (unsigned long)dev); + } + } + priv->chan = kzalloc(sizeof(struct talitos_channel) * priv->num_channels, GFP_KERNEL); if (!priv->chan) { @@ -2707,7 +2999,7 @@ static int talitos_probe(struct platform_device *ofdev) priv->fifo_len = roundup_pow_of_two(priv->chfifo_len); for (i = 0; i < priv->num_channels; i++) { - priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1); + priv->chan[i].reg = priv->reg + stride * (i + 1); if (!priv->irq[1] || !(i & 1)) priv->chan[i].reg += TALITOS_CH_BASE_OFFSET; @@ -2794,9 +3086,16 @@ err_out: } static const struct of_device_id talitos_match[] = { +#ifdef CONFIG_CRYPTO_DEV_TALITOS1 + { + .compatible = "fsl,sec1.0", + }, +#endif +#ifdef CONFIG_CRYPTO_DEV_TALITOS2 { .compatible = "fsl,sec2.0", }, +#endif {}, }; MODULE_DEVICE_TABLE(of, talitos_match); diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 61a1405..314daf5 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -29,7 +29,8 @@ */ #define TALITOS_TIMEOUT 100000 -#define TALITOS_MAX_DATA_LEN 65535 +#define TALITOS1_MAX_DATA_LEN 32768 +#define TALITOS2_MAX_DATA_LEN 65535 #define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f) #define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf) @@ -37,9 +38,17 @@ /* descriptor pointer entry */ struct talitos_ptr { - __be16 len; /* length */ - u8 j_extent; /* jump to sg link table and/or extent */ - u8 eptr; /* extended address */ + union { + struct { /* SEC2 format */ + __be16 len; /* length */ + u8 j_extent; /* jump to sg link table and/or extent*/ + u8 eptr; /* extended address */ + }; + struct { /* SEC1 format */ + __be16 res; + __be16 len1; /* length */ + }; + }; __be32 ptr; /* address */ }; @@ -53,10 +62,16 @@ static const struct talitos_ptr zero_entry = { /* descriptor */ struct talitos_desc { __be32 hdr; /* header high bits */ - __be32 hdr_lo; /* header low bits */ + union { + __be32 hdr_lo; /* header low bits */ + __be32 hdr1; /* header for SEC1 */ + }; struct talitos_ptr ptr[7]; /* ptr/len pair array */ + __be32 next_desc; /* next descriptor (SEC1) */ }; +#define TALITOS_DESC_SIZE (sizeof(struct talitos_desc) - sizeof(__be32)) + /** * talitos_request - descriptor submission request * @desc: descriptor pointer (kernel virtual) @@ -97,6 +112,14 @@ struct talitos_private { struct device *dev; struct platform_device *ofdev; void __iomem *reg; + void __iomem *reg_deu; + void __iomem *reg_aesu; + void __iomem *reg_mdeu; + void __iomem *reg_afeu; + void __iomem *reg_rngu; + void __iomem *reg_pkeu; + void __iomem *reg_keu; + void __iomem *reg_crcu; int irq[2]; /* SEC global registers lock */ @@ -144,49 +167,80 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002 #define TALITOS_FTR_SHA224_HWINIT 0x00000004 #define TALITOS_FTR_HMAC_OK 0x00000008 +#define TALITOS_FTR_SEC1 0x00000010 + +/* + * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are + * defined, we check the features which are set according to the device tree. + * Otherwise, we answer true or false directly + */ +static inline bool has_ftr_sec1(struct talitos_private *priv) +{ +#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2) + return priv->features & TALITOS_FTR_SEC1 ? true : false; +#elif defined(CONFIG_CRYPTO_DEV_TALITOS1) + return true; +#else + return false; +#endif +} /* * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register */ +#define ISR1_FORMAT(x) (((x) << 28) | ((x) << 16)) +#define ISR2_FORMAT(x) (((x) << 4) | (x)) + /* global register offset addresses */ #define TALITOS_MCR 0x1030 /* master control register */ #define TALITOS_MCR_RCA0 (1 << 15) /* remap channel 0 */ #define TALITOS_MCR_RCA1 (1 << 14) /* remap channel 1 */ #define TALITOS_MCR_RCA2 (1 << 13) /* remap channel 2 */ #define TALITOS_MCR_RCA3 (1 << 12) /* remap channel 3 */ -#define TALITOS_MCR_SWR 0x1 /* s/w reset */ +#define TALITOS1_MCR_SWR 0x1000000 /* s/w reset */ +#define TALITOS2_MCR_SWR 0x1 /* s/w reset */ #define TALITOS_MCR_LO 0x1034 #define TALITOS_IMR 0x1008 /* interrupt mask register */ -#define TALITOS_IMR_INIT 0x100ff /* enable channel IRQs */ -#define TALITOS_IMR_DONE 0x00055 /* done IRQs */ +/* enable channel IRQs */ +#define TALITOS1_IMR_INIT ISR1_FORMAT(0xf) +#define TALITOS1_IMR_DONE ISR1_FORMAT(0x5) /* done IRQs */ +/* enable channel IRQs */ +#define TALITOS2_IMR_INIT (ISR2_FORMAT(0xf) | 0x10000) +#define TALITOS2_IMR_DONE ISR1_FORMAT(0x5) /* done IRQs */ #define TALITOS_IMR_LO 0x100C -#define TALITOS_IMR_LO_INIT 0x20000 /* allow RNGU error IRQs */ +#define TALITOS1_IMR_LO_INIT 0x2000000 /* allow RNGU error IRQs */ +#define TALITOS2_IMR_LO_INIT 0x20000 /* allow RNGU error IRQs */ #define TALITOS_ISR 0x1010 /* interrupt status register */ -#define TALITOS_ISR_4CHERR 0xaa /* 4 channel errors mask */ -#define TALITOS_ISR_4CHDONE 0x55 /* 4 channel done mask */ -#define TALITOS_ISR_CH_0_2_ERR 0x22 /* channels 0, 2 errors mask */ -#define TALITOS_ISR_CH_0_2_DONE 0x11 /* channels 0, 2 done mask */ -#define TALITOS_ISR_CH_1_3_ERR 0x88 /* channels 1, 3 errors mask */ -#define TALITOS_ISR_CH_1_3_DONE 0x44 /* channels 1, 3 done mask */ +#define TALITOS1_ISR_4CHERR ISR1_FORMAT(0xa) /* 4 ch errors mask */ +#define TALITOS1_ISR_4CHDONE ISR1_FORMAT(0x5) /* 4 ch done mask */ +#define TALITOS1_ISR_TEA_ERR 0x00000040 +#define TALITOS2_ISR_4CHERR ISR2_FORMAT(0xa) /* 4 ch errors mask */ +#define TALITOS2_ISR_4CHDONE ISR2_FORMAT(0x5) /* 4 ch done mask */ +#define TALITOS2_ISR_CH_0_2_ERR ISR2_FORMAT(0x2) /* ch 0, 2 err mask */ +#define TALITOS2_ISR_CH_0_2_DONE ISR2_FORMAT(0x1) /* ch 0, 2 done mask */ +#define TALITOS2_ISR_CH_1_3_ERR ISR2_FORMAT(0x8) /* ch 1, 3 err mask */ +#define TALITOS2_ISR_CH_1_3_DONE ISR2_FORMAT(0x4) /* ch 1, 3 done mask */ #define TALITOS_ISR_LO 0x1014 #define TALITOS_ICR 0x1018 /* interrupt clear register */ #define TALITOS_ICR_LO 0x101C /* channel register address stride */ #define TALITOS_CH_BASE_OFFSET 0x1000 /* default channel map base */ -#define TALITOS_CH_STRIDE 0x100 +#define TALITOS1_CH_STRIDE 0x1000 +#define TALITOS2_CH_STRIDE 0x100 /* channel configuration register */ #define TALITOS_CCCR 0x8 -#define TALITOS_CCCR_CONT 0x2 /* channel continue */ -#define TALITOS_CCCR_RESET 0x1 /* channel reset */ +#define TALITOS2_CCCR_CONT 0x2 /* channel continue on SEC2 */ +#define TALITOS2_CCCR_RESET 0x1 /* channel reset on SEC2 */ #define TALITOS_CCCR_LO 0xc #define TALITOS_CCCR_LO_IWSE 0x80 /* chan. ICCR writeback enab. */ #define TALITOS_CCCR_LO_EAE 0x20 /* extended address enable */ #define TALITOS_CCCR_LO_CDWE 0x10 /* chan. done writeback enab. */ #define TALITOS_CCCR_LO_NT 0x4 /* notification type */ #define TALITOS_CCCR_LO_CDIE 0x2 /* channel done IRQ enable */ +#define TALITOS1_CCCR_LO_RESET 0x1 /* channel reset on SEC1 */ /* CCPSR: channel pointer status register */ #define TALITOS_CCPSR 0x10 @@ -224,37 +278,48 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_SCATTER 0xe0 #define TALITOS_SCATTER_LO 0xe4 +/* execution unit registers base */ +#define TALITOS2_DEU 0x2000 +#define TALITOS2_AESU 0x4000 +#define TALITOS2_MDEU 0x6000 +#define TALITOS2_AFEU 0x8000 +#define TALITOS2_RNGU 0xa000 +#define TALITOS2_PKEU 0xc000 +#define TALITOS2_KEU 0xe000 +#define TALITOS2_CRCU 0xf000 + +#define TALITOS12_AESU 0x4000 +#define TALITOS12_DEU 0x5000 +#define TALITOS12_MDEU 0x6000 + +#define TALITOS10_AFEU 0x8000 +#define TALITOS10_DEU 0xa000 +#define TALITOS10_MDEU 0xc000 +#define TALITOS10_RNGU 0xe000 +#define TALITOS10_PKEU 0x10000 +#define TALITOS10_AESU 0x12000 + /* execution unit interrupt status registers */ -#define TALITOS_DEUISR 0x2030 /* DES unit */ -#define TALITOS_DEUISR_LO 0x2034 -#define TALITOS_AESUISR 0x4030 /* AES unit */ -#define TALITOS_AESUISR_LO 0x4034 -#define TALITOS_MDEUISR 0x6030 /* message digest unit */ -#define TALITOS_MDEUISR_LO 0x6034 -#define TALITOS_MDEUICR 0x6038 /* interrupt control */ -#define TALITOS_MDEUICR_LO 0x603c +#define TALITOS_EUDSR 0x10 /* data size */ +#define TALITOS_EUDSR_LO 0x14 +#define TALITOS_EURCR 0x18 /* reset control*/ +#define TALITOS_EURCR_LO 0x1c +#define TALITOS_EUSR 0x28 /* rng status */ +#define TALITOS_EUSR_LO 0x2c +#define TALITOS_EUISR 0x30 +#define TALITOS_EUISR_LO 0x34 +#define TALITOS_EUICR 0x38 /* int. control */ +#define TALITOS_EUICR_LO 0x3c +#define TALITOS_EU_FIFO 0x800 /* output FIFO */ +#define TALITOS_EU_FIFO_LO 0x804 /* output FIFO */ +/* DES unit */ +#define TALITOS1_DEUICR_KPE 0x00200000 /* Key Parity Error */ +/* message digest unit */ #define TALITOS_MDEUICR_LO_ICE 0x4000 /* integrity check IRQ enable */ -#define TALITOS_AFEUISR 0x8030 /* arc4 unit */ -#define TALITOS_AFEUISR_LO 0x8034 -#define TALITOS_RNGUISR 0xa030 /* random number unit */ -#define TALITOS_RNGUISR_LO 0xa034 -#define TALITOS_RNGUSR 0xa028 /* rng status */ -#define TALITOS_RNGUSR_LO 0xa02c +/* random number unit */ #define TALITOS_RNGUSR_LO_RD 0x1 /* reset done */ #define TALITOS_RNGUSR_LO_OFL 0xff0000/* output FIFO length */ -#define TALITOS_RNGUDSR 0xa010 /* data size */ -#define TALITOS_RNGUDSR_LO 0xa014 -#define TALITOS_RNGU_FIFO 0xa800 /* output FIFO */ -#define TALITOS_RNGU_FIFO_LO 0xa804 /* output FIFO */ -#define TALITOS_RNGURCR 0xa018 /* reset control */ -#define TALITOS_RNGURCR_LO 0xa01c #define TALITOS_RNGURCR_LO_SR 0x1 /* software reset */ -#define TALITOS_PKEUISR 0xc030 /* public key unit */ -#define TALITOS_PKEUISR_LO 0xc034 -#define TALITOS_KEUISR 0xe030 /* kasumi unit */ -#define TALITOS_KEUISR_LO 0xe034 -#define TALITOS_CRCUISR 0xf030 /* cyclic redundancy check unit*/ -#define TALITOS_CRCUISR_LO 0xf034 #define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256 0x28 #define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512 0x48 diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index c699c6e..d28ab96 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -4,7 +4,7 @@ vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o gha ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le else -TARGET := linux-pcc64 +TARGET := linux-ppc64 endif quiet_cmd_perl = PERL $@ diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index 44d8d5c..4c398dd 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -82,7 +82,7 @@ module_init(p8_init); module_exit(p8_exit); MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); -MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8"); +MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions support on Power 8"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); diff --git a/include/crypto/aead.h b/include/crypto/aead.h index 94b19be..94141dc 100644 --- a/include/crypto/aead.h +++ b/include/crypto/aead.h @@ -18,6 +18,64 @@ #include <linux/slab.h> /** + * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API + * + * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD + * (listed as type "aead" in /proc/crypto) + * + * The most prominent examples for this type of encryption is GCM and CCM. + * However, the kernel supports other types of AEAD ciphers which are defined + * with the following cipher string: + * + * authenc(keyed message digest, block cipher) + * + * For example: authenc(hmac(sha256), cbc(aes)) + * + * The example code provided for the asynchronous block cipher operation + * applies here as well. Naturally all *ablkcipher* symbols must be exchanged + * the *aead* pendants discussed in the following. In addtion, for the AEAD + * operation, the aead_request_set_assoc function must be used to set the + * pointer to the associated data memory location before performing the + * encryption or decryption operation. In case of an encryption, the associated + * data memory is filled during the encryption operation. For decryption, the + * associated data memory must contain data that is used to verify the integrity + * of the decrypted data. Another deviation from the asynchronous block cipher + * operation is that the caller should explicitly check for -EBADMSG of the + * crypto_aead_decrypt. That error indicates an authentication error, i.e. + * a breach in the integrity of the message. In essence, that -EBADMSG error + * code is the key bonus an AEAD cipher has over "standard" block chaining + * modes. + */ + +/** + * struct aead_request - AEAD request + * @base: Common attributes for async crypto requests + * @assoclen: Length in bytes of associated data for authentication + * @cryptlen: Length of data to be encrypted or decrypted + * @iv: Initialisation vector + * @assoc: Associated data + * @src: Source data + * @dst: Destination data + * @__ctx: Start of private context data + */ +struct aead_request { + struct crypto_async_request base; + + bool old; + + unsigned int assoclen; + unsigned int cryptlen; + + u8 *iv; + + struct scatterlist *assoc; + struct scatterlist *src; + struct scatterlist *dst; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + +/** * struct aead_givcrypt_request - AEAD request with IV generation * @seq: Sequence number for IV generation * @giv: Space for generated IV @@ -30,6 +88,453 @@ struct aead_givcrypt_request { struct aead_request areq; }; +/** + * struct aead_alg - AEAD cipher definition + * @maxauthsize: Set the maximum authentication tag size supported by the + * transformation. A transformation may support smaller tag sizes. + * As the authentication tag is a message digest to ensure the + * integrity of the encrypted data, a consumer typically wants the + * largest authentication tag possible as defined by this + * variable. + * @setauthsize: Set authentication size for the AEAD transformation. This + * function is used to specify the consumer requested size of the + * authentication tag to be either generated by the transformation + * during encryption or the size of the authentication tag to be + * supplied during the decryption operation. This function is also + * responsible for checking the authentication tag size for + * validity. + * @setkey: see struct ablkcipher_alg + * @encrypt: see struct ablkcipher_alg + * @decrypt: see struct ablkcipher_alg + * @geniv: see struct ablkcipher_alg + * @ivsize: see struct ablkcipher_alg + * + * All fields except @ivsize is mandatory and must be filled. + */ +struct aead_alg { + int (*setkey)(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen); + int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); + int (*encrypt)(struct aead_request *req); + int (*decrypt)(struct aead_request *req); + + const char *geniv; + + unsigned int ivsize; + unsigned int maxauthsize; + + struct crypto_alg base; +}; + +struct crypto_aead { + int (*setkey)(struct crypto_aead *tfm, const u8 *key, + unsigned int keylen); + int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); + int (*encrypt)(struct aead_request *req); + int (*decrypt)(struct aead_request *req); + int (*givencrypt)(struct aead_givcrypt_request *req); + int (*givdecrypt)(struct aead_givcrypt_request *req); + + struct crypto_aead *child; + + unsigned int authsize; + unsigned int reqsize; + + struct crypto_tfm base; +}; + +static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) +{ + return container_of(tfm, struct crypto_aead, base); +} + +/** + * crypto_alloc_aead() - allocate AEAD cipher handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * AEAD cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Allocate a cipher handle for an AEAD. The returned struct + * crypto_aead is the cipher handle that is required for any subsequent + * API invocation for that AEAD. + * + * Return: allocated cipher handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); + +static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) +{ + return &tfm->base; +} + +/** + * crypto_free_aead() - zeroize and free aead handle + * @tfm: cipher handle to be freed + */ +static inline void crypto_free_aead(struct crypto_aead *tfm) +{ + crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm)); +} + +static inline struct crypto_aead *crypto_aead_crt(struct crypto_aead *tfm) +{ + return tfm; +} + +static inline struct old_aead_alg *crypto_old_aead_alg(struct crypto_aead *tfm) +{ + return &crypto_aead_tfm(tfm)->__crt_alg->cra_aead; +} + +static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) +{ + return container_of(crypto_aead_tfm(tfm)->__crt_alg, + struct aead_alg, base); +} + +static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg) +{ + return alg->base.cra_aead.encrypt ? alg->base.cra_aead.ivsize : + alg->ivsize; +} + +/** + * crypto_aead_ivsize() - obtain IV size + * @tfm: cipher handle + * + * The size of the IV for the aead referenced by the cipher handle is + * returned. This IV size may be zero if the cipher does not need an IV. + * + * Return: IV size in bytes + */ +static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) +{ + return crypto_aead_alg_ivsize(crypto_aead_alg(tfm)); +} + +/** + * crypto_aead_authsize() - obtain maximum authentication data size + * @tfm: cipher handle + * + * The maximum size of the authentication data for the AEAD cipher referenced + * by the AEAD cipher handle is returned. The authentication data size may be + * zero if the cipher implements a hard-coded maximum. + * + * The authentication data may also be known as "tag value". + * + * Return: authentication data size / tag size in bytes + */ +static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) +{ + return tfm->authsize; +} + +/** + * crypto_aead_blocksize() - obtain block size of cipher + * @tfm: cipher handle + * + * The block size for the AEAD referenced with the cipher handle is returned. + * The caller may use that information to allocate appropriate memory for the + * data returned by the encryption or decryption operation + * + * Return: block size of cipher + */ +static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); +} + +static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); +} + +static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) +{ + return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); +} + +static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); +} + +static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); +} + +/** + * crypto_aead_setkey() - set key for cipher + * @tfm: cipher handle + * @key: buffer holding the key + * @keylen: length of the key in bytes + * + * The caller provided key is set for the AEAD referenced by the cipher + * handle. + * + * Note, the key length determines the cipher type. Many block ciphers implement + * different cipher modes depending on the key size, such as AES-128 vs AES-192 + * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 + * is performed. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +int crypto_aead_setkey(struct crypto_aead *tfm, + const u8 *key, unsigned int keylen); + +/** + * crypto_aead_setauthsize() - set authentication data size + * @tfm: cipher handle + * @authsize: size of the authentication data / tag in bytes + * + * Set the authentication data size / tag size. AEAD requires an authentication + * tag (or MAC) in addition to the associated data. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); + +static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) +{ + return __crypto_aead_cast(req->base.tfm); +} + +/** + * crypto_aead_encrypt() - encrypt plaintext + * @req: reference to the aead_request handle that holds all information + * needed to perform the cipher operation + * + * Encrypt plaintext data using the aead_request handle. That data structure + * and how it is filled with data is discussed with the aead_request_* + * functions. + * + * IMPORTANT NOTE The encryption operation creates the authentication data / + * tag. That data is concatenated with the created ciphertext. + * The ciphertext memory size is therefore the given number of + * block cipher blocks + the size defined by the + * crypto_aead_setauthsize invocation. The caller must ensure + * that sufficient memory is available for the ciphertext and + * the authentication tag. + * + * Return: 0 if the cipher operation was successful; < 0 if an error occurred + */ +static inline int crypto_aead_encrypt(struct aead_request *req) +{ + return crypto_aead_reqtfm(req)->encrypt(req); +} + +/** + * crypto_aead_decrypt() - decrypt ciphertext + * @req: reference to the ablkcipher_request handle that holds all information + * needed to perform the cipher operation + * + * Decrypt ciphertext data using the aead_request handle. That data structure + * and how it is filled with data is discussed with the aead_request_* + * functions. + * + * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the + * authentication data / tag. That authentication data / tag + * must have the size defined by the crypto_aead_setauthsize + * invocation. + * + * + * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD + * cipher operation performs the authentication of the data during the + * decryption operation. Therefore, the function returns this error if + * the authentication of the ciphertext was unsuccessful (i.e. the + * integrity of the ciphertext or the associated data was violated); + * < 0 if an error occurred. + */ +static inline int crypto_aead_decrypt(struct aead_request *req) +{ + if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req))) + return -EINVAL; + + return crypto_aead_reqtfm(req)->decrypt(req); +} + +/** + * DOC: Asynchronous AEAD Request Handle + * + * The aead_request data structure contains all pointers to data required for + * the AEAD cipher operation. This includes the cipher handle (which can be + * used by multiple aead_request instances), pointer to plaintext and + * ciphertext, asynchronous callback function, etc. It acts as a handle to the + * aead_request_* API calls in a similar way as AEAD handle to the + * crypto_aead_* API calls. + */ + +/** + * crypto_aead_reqsize() - obtain size of the request data structure + * @tfm: cipher handle + * + * Return: number of bytes + */ +unsigned int crypto_aead_reqsize(struct crypto_aead *tfm); + +/** + * aead_request_set_tfm() - update cipher handle reference in request + * @req: request handle to be modified + * @tfm: cipher handle that shall be added to the request handle + * + * Allow the caller to replace the existing aead handle in the request + * data structure with a different one. + */ +static inline void aead_request_set_tfm(struct aead_request *req, + struct crypto_aead *tfm) +{ + req->base.tfm = crypto_aead_tfm(tfm->child); +} + +/** + * aead_request_alloc() - allocate request data structure + * @tfm: cipher handle to be registered with the request + * @gfp: memory allocation flag that is handed to kmalloc by the API call. + * + * Allocate the request data structure that must be used with the AEAD + * encrypt and decrypt API calls. During the allocation, the provided aead + * handle is registered in the request data structure. + * + * Return: allocated request handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, + gfp_t gfp) +{ + struct aead_request *req; + + req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); + + if (likely(req)) + aead_request_set_tfm(req, tfm); + + return req; +} + +/** + * aead_request_free() - zeroize and free request data structure + * @req: request data structure cipher handle to be freed + */ +static inline void aead_request_free(struct aead_request *req) +{ + kzfree(req); +} + +/** + * aead_request_set_callback() - set asynchronous callback function + * @req: request handle + * @flags: specify zero or an ORing of the flags + * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and + * increase the wait queue beyond the initial maximum size; + * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep + * @compl: callback function pointer to be registered with the request handle + * @data: The data pointer refers to memory that is not used by the kernel + * crypto API, but provided to the callback function for it to use. Here, + * the caller can provide a reference to memory the callback function can + * operate on. As the callback function is invoked asynchronously to the + * related functionality, it may need to access data structures of the + * related functionality which can be referenced using this pointer. The + * callback function can access the memory via the "data" field in the + * crypto_async_request data structure provided to the callback function. + * + * Setting the callback function that is triggered once the cipher operation + * completes + * + * The callback function is registered with the aead_request handle and + * must comply with the following template + * + * void callback_function(struct crypto_async_request *req, int error) + */ +static inline void aead_request_set_callback(struct aead_request *req, + u32 flags, + crypto_completion_t compl, + void *data) +{ + req->base.complete = compl; + req->base.data = data; + req->base.flags = flags; +} + +/** + * aead_request_set_crypt - set data buffers + * @req: request handle + * @src: source scatter / gather list + * @dst: destination scatter / gather list + * @cryptlen: number of bytes to process from @src + * @iv: IV for the cipher operation which must comply with the IV size defined + * by crypto_aead_ivsize() + * + * Setting the source data and destination data scatter / gather lists. + * + * For encryption, the source is treated as the plaintext and the + * destination is the ciphertext. For a decryption operation, the use is + * reversed - the source is the ciphertext and the destination is the plaintext. + * + * For both src/dst the layout is associated data, skipped data, + * plain/cipher text, authentication tag. + * + * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption, + * the caller must concatenate the ciphertext followed by the + * authentication tag and provide the entire data stream to the + * decryption operation (i.e. the data length used for the + * initialization of the scatterlist and the data length for the + * decryption operation is identical). For encryption, however, + * the authentication tag is created while encrypting the data. + * The destination buffer must hold sufficient space for the + * ciphertext and the authentication tag while the encryption + * invocation must only point to the plaintext data size. The + * following code snippet illustrates the memory usage + * buffer = kmalloc(ptbuflen + (enc ? authsize : 0)); + * sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0)); + * aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv); + */ +static inline void aead_request_set_crypt(struct aead_request *req, + struct scatterlist *src, + struct scatterlist *dst, + unsigned int cryptlen, u8 *iv) +{ + req->src = src; + req->dst = dst; + req->cryptlen = cryptlen; + req->iv = iv; +} + +/** + * aead_request_set_assoc() - set the associated data scatter / gather list + * @req: request handle + * @assoc: associated data scatter / gather list + * @assoclen: number of bytes to process from @assoc + * + * Obsolete, do not use. + */ +static inline void aead_request_set_assoc(struct aead_request *req, + struct scatterlist *assoc, + unsigned int assoclen) +{ + req->assoc = assoc; + req->assoclen = assoclen; + req->old = true; +} + +/** + * aead_request_set_ad - set associated data information + * @req: request handle + * @assoclen: number of bytes in associated data + * + * Setting the AD information. This function sets the length of + * the associated data and the number of bytes to skip after it to + * access the plain/cipher text. + */ +static inline void aead_request_set_ad(struct aead_request *req, + unsigned int assoclen) +{ + req->assoclen = assoclen; + req->old = false; +} + static inline struct crypto_aead *aead_givcrypt_reqtfm( struct aead_givcrypt_request *req) { @@ -38,14 +543,12 @@ static inline struct crypto_aead *aead_givcrypt_reqtfm( static inline int crypto_aead_givencrypt(struct aead_givcrypt_request *req) { - struct aead_tfm *crt = crypto_aead_crt(aead_givcrypt_reqtfm(req)); - return crt->givencrypt(req); + return aead_givcrypt_reqtfm(req)->givencrypt(req); }; static inline int crypto_aead_givdecrypt(struct aead_givcrypt_request *req) { - struct aead_tfm *crt = crypto_aead_crt(aead_givcrypt_reqtfm(req)); - return crt->givdecrypt(req); + return aead_givcrypt_reqtfm(req)->givdecrypt(req); }; static inline void aead_givcrypt_set_tfm(struct aead_givcrypt_request *req, diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index 0ecb768..d4ebf6e 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> +struct crypto_aead; struct module; struct rtattr; struct seq_file; @@ -126,7 +127,6 @@ struct ablkcipher_walk { }; extern const struct crypto_type crypto_ablkcipher_type; -extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_blkcipher_type; void crypto_mod_put(struct crypto_alg *alg); @@ -144,6 +144,8 @@ int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg, int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg, struct crypto_instance *inst, const struct crypto_type *frontend); +int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name, + u32 type, u32 mask); void crypto_drop_spawn(struct crypto_spawn *spawn); struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, @@ -239,22 +241,6 @@ static inline void *crypto_ablkcipher_ctx_aligned(struct crypto_ablkcipher *tfm) return crypto_tfm_ctx_aligned(&tfm->base); } -static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm) -{ - return &crypto_aead_tfm(tfm)->__crt_alg->cra_aead; -} - -static inline void *crypto_aead_ctx(struct crypto_aead *tfm) -{ - return crypto_tfm_ctx(&tfm->base); -} - -static inline struct crypto_instance *crypto_aead_alg_instance( - struct crypto_aead *aead) -{ - return crypto_tfm_alg_instance(&aead->base); -} - static inline struct crypto_blkcipher *crypto_spawn_blkcipher( struct crypto_spawn *spawn) { @@ -363,21 +349,6 @@ static inline int ablkcipher_tfm_in_queue(struct crypto_queue *queue, return crypto_tfm_in_queue(queue, crypto_ablkcipher_tfm(tfm)); } -static inline void *aead_request_ctx(struct aead_request *req) -{ - return req->__ctx; -} - -static inline void aead_request_complete(struct aead_request *req, int err) -{ - req->base.complete(&req->base, err); -} - -static inline u32 aead_request_flags(struct aead_request *req) -{ - return req->base.flags; -} - static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb, u32 type, u32 mask) { diff --git a/include/crypto/compress.h b/include/crypto/compress.h index 86163ef..5b67af8 100644 --- a/include/crypto/compress.h +++ b/include/crypto/compress.h @@ -55,14 +55,14 @@ struct crypto_pcomp { }; struct pcomp_alg { - int (*compress_setup)(struct crypto_pcomp *tfm, void *params, + int (*compress_setup)(struct crypto_pcomp *tfm, const void *params, unsigned int len); int (*compress_init)(struct crypto_pcomp *tfm); int (*compress_update)(struct crypto_pcomp *tfm, struct comp_request *req); int (*compress_final)(struct crypto_pcomp *tfm, struct comp_request *req); - int (*decompress_setup)(struct crypto_pcomp *tfm, void *params, + int (*decompress_setup)(struct crypto_pcomp *tfm, const void *params, unsigned int len); int (*decompress_init)(struct crypto_pcomp *tfm); int (*decompress_update)(struct crypto_pcomp *tfm, @@ -97,7 +97,7 @@ static inline struct pcomp_alg *crypto_pcomp_alg(struct crypto_pcomp *tfm) } static inline int crypto_compress_setup(struct crypto_pcomp *tfm, - void *params, unsigned int len) + const void *params, unsigned int len) { return crypto_pcomp_alg(tfm)->compress_setup(tfm, params, len); } @@ -120,7 +120,7 @@ static inline int crypto_compress_final(struct crypto_pcomp *tfm, } static inline int crypto_decompress_setup(struct crypto_pcomp *tfm, - void *params, unsigned int len) + const void *params, unsigned int len) { return crypto_pcomp_alg(tfm)->decompress_setup(tfm, params, len); } diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h index ba98918..1547f54 100644 --- a/include/crypto/cryptd.h +++ b/include/crypto/cryptd.h @@ -14,6 +14,7 @@ #include <linux/crypto.h> #include <linux/kernel.h> +#include <crypto/aead.h> #include <crypto/hash.h> struct cryptd_ablkcipher { diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h index 5186f75..c3f208d 100644 --- a/include/crypto/drbg.h +++ b/include/crypto/drbg.h @@ -49,8 +49,9 @@ #include <crypto/internal/rng.h> #include <crypto/rng.h> #include <linux/fips.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/list.h> +#include <linux/workqueue.h> /* * Concatenation Helper and string operation helper @@ -104,7 +105,7 @@ struct drbg_test_data { }; struct drbg_state { - spinlock_t drbg_lock; /* lock around DRBG */ + struct mutex drbg_mutex; /* lock around DRBG */ unsigned char *V; /* internal state 10.1.1.1 1a) */ /* hash: static value 10.1.1.1 1b) hmac / ctr: key */ unsigned char *C; @@ -119,9 +120,13 @@ struct drbg_state { bool fips_primed; /* Continuous test primed? */ unsigned char *prev; /* FIPS 140-2 continuous test value */ #endif + struct work_struct seed_work; /* asynchronous seeding support */ + u8 *seed_buf; /* buffer holding the seed */ + size_t seed_buf_len; + struct crypto_rng *jent; const struct drbg_state_ops *d_ops; const struct drbg_core *core; - struct drbg_test_data *test_data; + struct drbg_string test_data; }; static inline __u8 drbg_statelen(struct drbg_state *drbg) @@ -177,19 +182,8 @@ static inline size_t drbg_max_requests(struct drbg_state *drbg) } /* - * kernel crypto API input data structure for DRBG generate in case dlen - * is set to 0 - */ -struct drbg_gen { - unsigned char *outbuf; /* output buffer for random numbers */ - unsigned int outlen; /* size of output buffer */ - struct drbg_string *addtl; /* additional information string */ - struct drbg_test_data *test_data; /* test data */ -}; - -/* * This is a wrapper to the kernel crypto API function of - * crypto_rng_get_bytes() to allow the caller to provide additional data. + * crypto_rng_generate() to allow the caller to provide additional data. * * @drng DRBG handle -- see crypto_rng_get_bytes * @outbuf output buffer -- see crypto_rng_get_bytes @@ -204,21 +198,15 @@ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng, unsigned char *outbuf, unsigned int outlen, struct drbg_string *addtl) { - int ret; - struct drbg_gen genbuf; - genbuf.outbuf = outbuf; - genbuf.outlen = outlen; - genbuf.addtl = addtl; - genbuf.test_data = NULL; - ret = crypto_rng_get_bytes(drng, (u8 *)&genbuf, 0); - return ret; + return crypto_rng_generate(drng, addtl->buf, addtl->len, + outbuf, outlen); } /* * TEST code * * This is a wrapper to the kernel crypto API function of - * crypto_rng_get_bytes() to allow the caller to provide additional data and + * crypto_rng_generate() to allow the caller to provide additional data and * allow furnishing of test_data * * @drng DRBG handle -- see crypto_rng_get_bytes @@ -236,14 +224,10 @@ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng, struct drbg_string *addtl, struct drbg_test_data *test_data) { - int ret; - struct drbg_gen genbuf; - genbuf.outbuf = outbuf; - genbuf.outlen = outlen; - genbuf.addtl = addtl; - genbuf.test_data = test_data; - ret = crypto_rng_get_bytes(drng, (u8 *)&genbuf, 0); - return ret; + crypto_rng_set_entropy(drng, test_data->testentropy->buf, + test_data->testentropy->len); + return crypto_rng_generate(drng, addtl->buf, addtl->len, + outbuf, outlen); } /* @@ -264,14 +248,9 @@ static inline int crypto_drbg_reset_test(struct crypto_rng *drng, struct drbg_string *pers, struct drbg_test_data *test_data) { - int ret; - struct drbg_gen genbuf; - genbuf.outbuf = NULL; - genbuf.outlen = 0; - genbuf.addtl = pers; - genbuf.test_data = test_data; - ret = crypto_rng_reset(drng, (u8 *)&genbuf, 0); - return ret; + crypto_rng_set_entropy(drng, test_data->testentropy->buf, + test_data->testentropy->len); + return crypto_rng_reset(drng, pers->buf, pers->len); } /* DRBG type flags */ diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h index 2eba340..4137330 100644 --- a/include/crypto/internal/aead.h +++ b/include/crypto/internal/aead.h @@ -19,12 +19,59 @@ struct rtattr; +struct aead_instance { + struct aead_alg alg; +}; + struct crypto_aead_spawn { struct crypto_spawn base; }; +extern const struct crypto_type crypto_aead_type; extern const struct crypto_type crypto_nivaead_type; +static inline void *crypto_aead_ctx(struct crypto_aead *tfm) +{ + return crypto_tfm_ctx(&tfm->base); +} + +static inline struct crypto_instance *crypto_aead_alg_instance( + struct crypto_aead *aead) +{ + return crypto_tfm_alg_instance(&aead->base); +} + +static inline struct crypto_instance *aead_crypto_instance( + struct aead_instance *inst) +{ + return container_of(&inst->alg.base, struct crypto_instance, alg); +} + +static inline struct aead_instance *aead_instance(struct crypto_instance *inst) +{ + return container_of(&inst->alg, struct aead_instance, alg.base); +} + +static inline void *aead_instance_ctx(struct aead_instance *inst) +{ + return crypto_instance_ctx(aead_crypto_instance(inst)); +} + +static inline void *aead_request_ctx(struct aead_request *req) +{ + return req->__ctx; +} + +static inline void aead_request_complete(struct aead_request *req, int err) +{ + req->base.complete(&req->base, err); +} + +static inline u32 aead_request_flags(struct aead_request *req) +{ + return req->base.flags; +} + static inline void crypto_set_aead_spawn( struct crypto_aead_spawn *spawn, struct crypto_instance *inst) { @@ -47,24 +94,27 @@ static inline struct crypto_alg *crypto_aead_spawn_alg( return spawn->base.alg; } +static inline struct aead_alg *crypto_spawn_aead_alg( + struct crypto_aead_spawn *spawn) +{ + return container_of(spawn->base.alg, struct aead_alg, base); +} + static inline struct crypto_aead *crypto_spawn_aead( struct crypto_aead_spawn *spawn) { - return __crypto_aead_cast( - crypto_spawn_tfm(&spawn->base, CRYPTO_ALG_TYPE_AEAD, - CRYPTO_ALG_TYPE_MASK)); + return crypto_spawn_tfm2(&spawn->base); } -struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl, - struct rtattr **tb, u32 type, - u32 mask); -void aead_geniv_free(struct crypto_instance *inst); +struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl, + struct rtattr **tb, u32 type, u32 mask); +void aead_geniv_free(struct aead_instance *inst); int aead_geniv_init(struct crypto_tfm *tfm); void aead_geniv_exit(struct crypto_tfm *tfm); static inline struct crypto_aead *aead_geniv_base(struct crypto_aead *geniv) { - return crypto_aead_crt(geniv)->base; + return geniv->child; } static inline void *aead_givcrypt_reqctx(struct aead_givcrypt_request *req) @@ -78,5 +128,27 @@ static inline void aead_givcrypt_complete(struct aead_givcrypt_request *req, aead_request_complete(&req->areq, err); } +static inline void crypto_aead_set_reqsize(struct crypto_aead *aead, + unsigned int reqsize) +{ + crypto_aead_crt(aead)->reqsize = reqsize; +} + +static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg) +{ + return alg->base.cra_aead.encrypt ? alg->base.cra_aead.maxauthsize : + alg->maxauthsize; +} + +static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead) +{ + return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead)); +} + +int crypto_register_aead(struct aead_alg *alg); +int crypto_unregister_aead(struct aead_alg *alg); +int aead_register_instance(struct crypto_template *tmpl, + struct aead_instance *inst); + #endif /* _CRYPTO_INTERNAL_AEAD_H */ diff --git a/include/crypto/internal/rng.h b/include/crypto/internal/rng.h index 8969733..263f1a5 100644 --- a/include/crypto/internal/rng.h +++ b/include/crypto/internal/rng.h @@ -2,6 +2,7 @@ * RNG: Random Number Generator algorithms under the crypto API * * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com> + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -16,11 +17,20 @@ #include <crypto/algapi.h> #include <crypto/rng.h> -extern const struct crypto_type crypto_rng_type; +int crypto_register_rng(struct rng_alg *alg); +void crypto_unregister_rng(struct rng_alg *alg); +int crypto_register_rngs(struct rng_alg *algs, int count); +void crypto_unregister_rngs(struct rng_alg *algs, int count); static inline void *crypto_rng_ctx(struct crypto_rng *tfm) { return crypto_tfm_ctx(&tfm->base); } +static inline void crypto_rng_set_entropy(struct crypto_rng *tfm, + const u8 *data, unsigned int len) +{ + crypto_rng_alg(tfm)->set_ent(tfm, data, len); +} + #endif diff --git a/include/crypto/md5.h b/include/crypto/md5.h index 65f299b..146af825 100644 --- a/include/crypto/md5.h +++ b/include/crypto/md5.h @@ -8,6 +8,11 @@ #define MD5_BLOCK_WORDS 16 #define MD5_HASH_WORDS 4 +#define MD5_H0 0x67452301UL +#define MD5_H1 0xefcdab89UL +#define MD5_H2 0x98badcfeUL +#define MD5_H3 0x10325476UL + struct md5_state { u32 hash[MD5_HASH_WORDS]; u32 block[MD5_BLOCK_WORDS]; diff --git a/include/crypto/null.h b/include/crypto/null.h index b7c864c..06dc30d 100644 --- a/include/crypto/null.h +++ b/include/crypto/null.h @@ -8,4 +8,7 @@ #define NULL_DIGEST_SIZE 0 #define NULL_IV_SIZE 0 +struct crypto_blkcipher *crypto_get_default_null_skcipher(void); +void crypto_put_default_null_skcipher(void); + #endif diff --git a/include/crypto/rng.h b/include/crypto/rng.h index 6e28ea5..c5d4684 100644 --- a/include/crypto/rng.h +++ b/include/crypto/rng.h @@ -2,6 +2,7 @@ * RNG: Random Number Generator algorithms under the crypto API * * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com> + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -15,6 +16,50 @@ #include <linux/crypto.h> +struct crypto_rng; + +/** + * struct rng_alg - random number generator definition + * + * @generate: The function defined by this variable obtains a + * random number. The random number generator transform + * must generate the random number out of the context + * provided with this call, plus any additional data + * if provided to the call. + * @seed: Seed or reseed the random number generator. With the + * invocation of this function call, the random number + * generator shall become ready fo generation. If the + * random number generator requires a seed for setting + * up a new state, the seed must be provided by the + * consumer while invoking this function. The required + * size of the seed is defined with @seedsize . + * @set_ent: Set entropy that would otherwise be obtained from + * entropy source. Internal use only. + * @seedsize: The seed size required for a random number generator + * initialization defined with this variable. Some + * random number generators does not require a seed + * as the seeding is implemented internally without + * the need of support by the consumer. In this case, + * the seed size is set to zero. + * @base: Common crypto API algorithm data structure. + */ +struct rng_alg { + int (*generate)(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen); + int (*seed)(struct crypto_rng *tfm, const u8 *seed, unsigned int slen); + void (*set_ent)(struct crypto_rng *tfm, const u8 *data, + unsigned int len); + + unsigned int seedsize; + + struct crypto_alg base; +}; + +struct crypto_rng { + struct crypto_tfm base; +}; + extern struct crypto_rng *crypto_default_rng; int crypto_get_default_rng(void); @@ -27,11 +72,6 @@ void crypto_put_default_rng(void); * CRYPTO_ALG_TYPE_RNG (listed as type "rng" in /proc/crypto) */ -static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_rng *)tfm; -} - /** * crypto_alloc_rng() -- allocate RNG handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the @@ -52,15 +92,7 @@ static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm) * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ -static inline struct crypto_rng *crypto_alloc_rng(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_RNG; - mask |= CRYPTO_ALG_TYPE_MASK; - - return __crypto_rng_cast(crypto_alloc_base(alg_name, type, mask)); -} +struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm) { @@ -77,12 +109,8 @@ static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm) */ static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm) { - return &crypto_rng_tfm(tfm)->__crt_alg->cra_rng; -} - -static inline struct rng_tfm *crypto_rng_crt(struct crypto_rng *tfm) -{ - return &crypto_rng_tfm(tfm)->crt_rng; + return container_of(crypto_rng_tfm(tfm)->__crt_alg, + struct rng_alg, base); } /** @@ -91,7 +119,28 @@ static inline struct rng_tfm *crypto_rng_crt(struct crypto_rng *tfm) */ static inline void crypto_free_rng(struct crypto_rng *tfm) { - crypto_free_tfm(crypto_rng_tfm(tfm)); + crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm)); +} + +/** + * crypto_rng_generate() - get random number + * @tfm: cipher handle + * @src: Input buffer holding additional data, may be NULL + * @slen: Length of additional data + * @dst: output buffer holding the random numbers + * @dlen: length of the output buffer + * + * This function fills the caller-allocated buffer with random + * numbers using the random number generator referenced by the + * cipher handle. + * + * Return: 0 function was successful; < 0 if an error occurred + */ +static inline int crypto_rng_generate(struct crypto_rng *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int dlen) +{ + return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen); } /** @@ -108,7 +157,7 @@ static inline void crypto_free_rng(struct crypto_rng *tfm) static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen) { - return crypto_rng_crt(tfm)->rng_gen_random(tfm, rdata, dlen); + return crypto_rng_generate(tfm, NULL, 0, rdata, dlen); } /** @@ -128,11 +177,8 @@ static inline int crypto_rng_get_bytes(struct crypto_rng *tfm, * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ -static inline int crypto_rng_reset(struct crypto_rng *tfm, - u8 *seed, unsigned int slen) -{ - return crypto_rng_crt(tfm)->rng_reset(tfm, seed, slen); -} +int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, + unsigned int slen); /** * crypto_rng_seedsize() - obtain seed size of RNG diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 20e4226..96670e7 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -102,4 +102,8 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg, int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes); +struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2], + struct scatterlist *src, + unsigned int len); + #endif /* _CRYPTO_SCATTERWALK_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 10df5d2..7d290a9 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -138,9 +138,9 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; -struct crypto_rng; struct crypto_tfm; struct crypto_type; +struct aead_request; struct aead_givcrypt_request; struct skcipher_givcrypt_request; @@ -175,32 +175,6 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; -/** - * struct aead_request - AEAD request - * @base: Common attributes for async crypto requests - * @assoclen: Length in bytes of associated data for authentication - * @cryptlen: Length of data to be encrypted or decrypted - * @iv: Initialisation vector - * @assoc: Associated data - * @src: Source data - * @dst: Destination data - * @__ctx: Start of private context data - */ -struct aead_request { - struct crypto_async_request base; - - unsigned int assoclen; - unsigned int cryptlen; - - u8 *iv; - - struct scatterlist *assoc; - struct scatterlist *src; - struct scatterlist *dst; - - void *__ctx[] CRYPTO_MINALIGN_ATTR; -}; - struct blkcipher_desc { struct crypto_blkcipher *tfm; void *info; @@ -294,7 +268,7 @@ struct ablkcipher_alg { }; /** - * struct aead_alg - AEAD cipher definition + * struct old_aead_alg - AEAD cipher definition * @maxauthsize: Set the maximum authentication tag size supported by the * transformation. A transformation may support smaller tag sizes. * As the authentication tag is a message digest to ensure the @@ -319,7 +293,7 @@ struct ablkcipher_alg { * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are * mandatory and must be filled. */ -struct aead_alg { +struct old_aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize); @@ -426,40 +400,12 @@ struct compress_alg { unsigned int slen, u8 *dst, unsigned int *dlen); }; -/** - * struct rng_alg - random number generator definition - * @rng_make_random: The function defined by this variable obtains a random - * number. The random number generator transform must generate - * the random number out of the context provided with this - * call. - * @rng_reset: Reset of the random number generator by clearing the entire state. - * With the invocation of this function call, the random number - * generator shall completely reinitialize its state. If the random - * number generator requires a seed for setting up a new state, - * the seed must be provided by the consumer while invoking this - * function. The required size of the seed is defined with - * @seedsize . - * @seedsize: The seed size required for a random number generator - * initialization defined with this variable. Some random number - * generators like the SP800-90A DRBG does not require a seed as the - * seeding is implemented internally without the need of support by - * the consumer. In this case, the seed size is set to zero. - */ -struct rng_alg { - int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen); - int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); - - unsigned int seedsize; -}; - #define cra_ablkcipher cra_u.ablkcipher #define cra_aead cra_u.aead #define cra_blkcipher cra_u.blkcipher #define cra_cipher cra_u.cipher #define cra_compress cra_u.compress -#define cra_rng cra_u.rng /** * struct crypto_alg - definition of a cryptograpic cipher algorithm @@ -555,11 +501,10 @@ struct crypto_alg { union { struct ablkcipher_alg ablkcipher; - struct aead_alg aead; + struct old_aead_alg aead; struct blkcipher_alg blkcipher; struct cipher_alg cipher; struct compress_alg compress; - struct rng_alg rng; } cra_u; int (*cra_init)(struct crypto_tfm *tfm); @@ -602,21 +547,6 @@ struct ablkcipher_tfm { unsigned int reqsize; }; -struct aead_tfm { - int (*setkey)(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen); - int (*encrypt)(struct aead_request *req); - int (*decrypt)(struct aead_request *req); - int (*givencrypt)(struct aead_givcrypt_request *req); - int (*givdecrypt)(struct aead_givcrypt_request *req); - - struct crypto_aead *base; - - unsigned int ivsize; - unsigned int authsize; - unsigned int reqsize; -}; - struct blkcipher_tfm { void *iv; int (*setkey)(struct crypto_tfm *tfm, const u8 *key, @@ -655,19 +585,11 @@ struct compress_tfm { u8 *dst, unsigned int *dlen); }; -struct rng_tfm { - int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen); - int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen); -}; - #define crt_ablkcipher crt_u.ablkcipher -#define crt_aead crt_u.aead #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash #define crt_compress crt_u.compress -#define crt_rng crt_u.rng struct crypto_tfm { @@ -675,12 +597,10 @@ struct crypto_tfm { union { struct ablkcipher_tfm ablkcipher; - struct aead_tfm aead; struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; struct compress_tfm compress; - struct rng_tfm rng; } crt_u; void (*exit)(struct crypto_tfm *tfm); @@ -694,10 +614,6 @@ struct crypto_ablkcipher { struct crypto_tfm base; }; -struct crypto_aead { - struct crypto_tfm base; -}; - struct crypto_blkcipher { struct crypto_tfm base; }; @@ -714,10 +630,6 @@ struct crypto_hash { struct crypto_tfm base; }; -struct crypto_rng { - struct crypto_tfm base; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1194,400 +1106,6 @@ static inline void ablkcipher_request_set_crypt( } /** - * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API - * - * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD - * (listed as type "aead" in /proc/crypto) - * - * The most prominent examples for this type of encryption is GCM and CCM. - * However, the kernel supports other types of AEAD ciphers which are defined - * with the following cipher string: - * - * authenc(keyed message digest, block cipher) - * - * For example: authenc(hmac(sha256), cbc(aes)) - * - * The example code provided for the asynchronous block cipher operation - * applies here as well. Naturally all *ablkcipher* symbols must be exchanged - * the *aead* pendants discussed in the following. In addtion, for the AEAD - * operation, the aead_request_set_assoc function must be used to set the - * pointer to the associated data memory location before performing the - * encryption or decryption operation. In case of an encryption, the associated - * data memory is filled during the encryption operation. For decryption, the - * associated data memory must contain data that is used to verify the integrity - * of the decrypted data. Another deviation from the asynchronous block cipher - * operation is that the caller should explicitly check for -EBADMSG of the - * crypto_aead_decrypt. That error indicates an authentication error, i.e. - * a breach in the integrity of the message. In essence, that -EBADMSG error - * code is the key bonus an AEAD cipher has over "standard" block chaining - * modes. - */ - -static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_aead *)tfm; -} - -/** - * crypto_alloc_aead() - allocate AEAD cipher handle - * @alg_name: is the cra_name / name or cra_driver_name / driver name of the - * AEAD cipher - * @type: specifies the type of the cipher - * @mask: specifies the mask for the cipher - * - * Allocate a cipher handle for an AEAD. The returned struct - * crypto_aead is the cipher handle that is required for any subsequent - * API invocation for that AEAD. - * - * Return: allocated cipher handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask); - -static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm) -{ - return &tfm->base; -} - -/** - * crypto_free_aead() - zeroize and free aead handle - * @tfm: cipher handle to be freed - */ -static inline void crypto_free_aead(struct crypto_aead *tfm) -{ - crypto_free_tfm(crypto_aead_tfm(tfm)); -} - -static inline struct aead_tfm *crypto_aead_crt(struct crypto_aead *tfm) -{ - return &crypto_aead_tfm(tfm)->crt_aead; -} - -/** - * crypto_aead_ivsize() - obtain IV size - * @tfm: cipher handle - * - * The size of the IV for the aead referenced by the cipher handle is - * returned. This IV size may be zero if the cipher does not need an IV. - * - * Return: IV size in bytes - */ -static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->ivsize; -} - -/** - * crypto_aead_authsize() - obtain maximum authentication data size - * @tfm: cipher handle - * - * The maximum size of the authentication data for the AEAD cipher referenced - * by the AEAD cipher handle is returned. The authentication data size may be - * zero if the cipher implements a hard-coded maximum. - * - * The authentication data may also be known as "tag value". - * - * Return: authentication data size / tag size in bytes - */ -static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->authsize; -} - -/** - * crypto_aead_blocksize() - obtain block size of cipher - * @tfm: cipher handle - * - * The block size for the AEAD referenced with the cipher handle is returned. - * The caller may use that information to allocate appropriate memory for the - * data returned by the encryption or decryption operation - * - * Return: block size of cipher - */ -static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm) -{ - return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm)); -} - -static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm)); -} - -static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm) -{ - return crypto_tfm_get_flags(crypto_aead_tfm(tfm)); -} - -static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags) -{ - crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags); -} - -static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags) -{ - crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags); -} - -/** - * crypto_aead_setkey() - set key for cipher - * @tfm: cipher handle - * @key: buffer holding the key - * @keylen: length of the key in bytes - * - * The caller provided key is set for the AEAD referenced by the cipher - * handle. - * - * Note, the key length determines the cipher type. Many block ciphers implement - * different cipher modes depending on the key size, such as AES-128 vs AES-192 - * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 - * is performed. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key, - unsigned int keylen) -{ - struct aead_tfm *crt = crypto_aead_crt(tfm); - - return crt->setkey(crt->base, key, keylen); -} - -/** - * crypto_aead_setauthsize() - set authentication data size - * @tfm: cipher handle - * @authsize: size of the authentication data / tag in bytes - * - * Set the authentication data size / tag size. AEAD requires an authentication - * tag (or MAC) in addition to the associated data. - * - * Return: 0 if the setting of the key was successful; < 0 if an error occurred - */ -int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize); - -static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req) -{ - return __crypto_aead_cast(req->base.tfm); -} - -/** - * crypto_aead_encrypt() - encrypt plaintext - * @req: reference to the aead_request handle that holds all information - * needed to perform the cipher operation - * - * Encrypt plaintext data using the aead_request handle. That data structure - * and how it is filled with data is discussed with the aead_request_* - * functions. - * - * IMPORTANT NOTE The encryption operation creates the authentication data / - * tag. That data is concatenated with the created ciphertext. - * The ciphertext memory size is therefore the given number of - * block cipher blocks + the size defined by the - * crypto_aead_setauthsize invocation. The caller must ensure - * that sufficient memory is available for the ciphertext and - * the authentication tag. - * - * Return: 0 if the cipher operation was successful; < 0 if an error occurred - */ -static inline int crypto_aead_encrypt(struct aead_request *req) -{ - return crypto_aead_crt(crypto_aead_reqtfm(req))->encrypt(req); -} - -/** - * crypto_aead_decrypt() - decrypt ciphertext - * @req: reference to the ablkcipher_request handle that holds all information - * needed to perform the cipher operation - * - * Decrypt ciphertext data using the aead_request handle. That data structure - * and how it is filled with data is discussed with the aead_request_* - * functions. - * - * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the - * authentication data / tag. That authentication data / tag - * must have the size defined by the crypto_aead_setauthsize - * invocation. - * - * - * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD - * cipher operation performs the authentication of the data during the - * decryption operation. Therefore, the function returns this error if - * the authentication of the ciphertext was unsuccessful (i.e. the - * integrity of the ciphertext or the associated data was violated); - * < 0 if an error occurred. - */ -static inline int crypto_aead_decrypt(struct aead_request *req) -{ - if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req))) - return -EINVAL; - - return crypto_aead_crt(crypto_aead_reqtfm(req))->decrypt(req); -} - -/** - * DOC: Asynchronous AEAD Request Handle - * - * The aead_request data structure contains all pointers to data required for - * the AEAD cipher operation. This includes the cipher handle (which can be - * used by multiple aead_request instances), pointer to plaintext and - * ciphertext, asynchronous callback function, etc. It acts as a handle to the - * aead_request_* API calls in a similar way as AEAD handle to the - * crypto_aead_* API calls. - */ - -/** - * crypto_aead_reqsize() - obtain size of the request data structure - * @tfm: cipher handle - * - * Return: number of bytes - */ -static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm) -{ - return crypto_aead_crt(tfm)->reqsize; -} - -/** - * aead_request_set_tfm() - update cipher handle reference in request - * @req: request handle to be modified - * @tfm: cipher handle that shall be added to the request handle - * - * Allow the caller to replace the existing aead handle in the request - * data structure with a different one. - */ -static inline void aead_request_set_tfm(struct aead_request *req, - struct crypto_aead *tfm) -{ - req->base.tfm = crypto_aead_tfm(crypto_aead_crt(tfm)->base); -} - -/** - * aead_request_alloc() - allocate request data structure - * @tfm: cipher handle to be registered with the request - * @gfp: memory allocation flag that is handed to kmalloc by the API call. - * - * Allocate the request data structure that must be used with the AEAD - * encrypt and decrypt API calls. During the allocation, the provided aead - * handle is registered in the request data structure. - * - * Return: allocated request handle in case of success; IS_ERR() is true in case - * of an error, PTR_ERR() returns the error code. - */ -static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm, - gfp_t gfp) -{ - struct aead_request *req; - - req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp); - - if (likely(req)) - aead_request_set_tfm(req, tfm); - - return req; -} - -/** - * aead_request_free() - zeroize and free request data structure - * @req: request data structure cipher handle to be freed - */ -static inline void aead_request_free(struct aead_request *req) -{ - kzfree(req); -} - -/** - * aead_request_set_callback() - set asynchronous callback function - * @req: request handle - * @flags: specify zero or an ORing of the flags - * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and - * increase the wait queue beyond the initial maximum size; - * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep - * @compl: callback function pointer to be registered with the request handle - * @data: The data pointer refers to memory that is not used by the kernel - * crypto API, but provided to the callback function for it to use. Here, - * the caller can provide a reference to memory the callback function can - * operate on. As the callback function is invoked asynchronously to the - * related functionality, it may need to access data structures of the - * related functionality which can be referenced using this pointer. The - * callback function can access the memory via the "data" field in the - * crypto_async_request data structure provided to the callback function. - * - * Setting the callback function that is triggered once the cipher operation - * completes - * - * The callback function is registered with the aead_request handle and - * must comply with the following template - * - * void callback_function(struct crypto_async_request *req, int error) - */ -static inline void aead_request_set_callback(struct aead_request *req, - u32 flags, - crypto_completion_t compl, - void *data) -{ - req->base.complete = compl; - req->base.data = data; - req->base.flags = flags; -} - -/** - * aead_request_set_crypt - set data buffers - * @req: request handle - * @src: source scatter / gather list - * @dst: destination scatter / gather list - * @cryptlen: number of bytes to process from @src - * @iv: IV for the cipher operation which must comply with the IV size defined - * by crypto_aead_ivsize() - * - * Setting the source data and destination data scatter / gather lists. - * - * For encryption, the source is treated as the plaintext and the - * destination is the ciphertext. For a decryption operation, the use is - * reversed - the source is the ciphertext and the destination is the plaintext. - * - * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption, - * the caller must concatenate the ciphertext followed by the - * authentication tag and provide the entire data stream to the - * decryption operation (i.e. the data length used for the - * initialization of the scatterlist and the data length for the - * decryption operation is identical). For encryption, however, - * the authentication tag is created while encrypting the data. - * The destination buffer must hold sufficient space for the - * ciphertext and the authentication tag while the encryption - * invocation must only point to the plaintext data size. The - * following code snippet illustrates the memory usage - * buffer = kmalloc(ptbuflen + (enc ? authsize : 0)); - * sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0)); - * aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv); - */ -static inline void aead_request_set_crypt(struct aead_request *req, - struct scatterlist *src, - struct scatterlist *dst, - unsigned int cryptlen, u8 *iv) -{ - req->src = src; - req->dst = dst; - req->cryptlen = cryptlen; - req->iv = iv; -} - -/** - * aead_request_set_assoc() - set the associated data scatter / gather list - * @req: request handle - * @assoc: associated data scatter / gather list - * @assoclen: number of bytes to process from @assoc - * - * For encryption, the memory is filled with the associated data. For - * decryption, the memory must point to the associated data. - */ -static inline void aead_request_set_assoc(struct aead_request *req, - struct scatterlist *assoc, - unsigned int assoclen) -{ - req->assoc = assoc; - req->assoclen = assoclen; -} - -/** * DOC: Synchronous Block Cipher API * * The synchronous block cipher API is used with the ciphers of type diff --git a/include/linux/module.h b/include/linux/module.h index c883b86..1e54360 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -655,4 +655,16 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr, static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ +#ifdef CONFIG_MODULE_SIG +static inline bool module_sig_ok(struct module *module) +{ + return module->sig_ok; +} +#else /* !CONFIG_MODULE_SIG */ +static inline bool module_sig_ok(struct module *module) +{ + return true; +} +#endif /* CONFIG_MODULE_SIG */ + #endif /* _LINUX_MODULE_H */ diff --git a/include/linux/nx842.h b/include/linux/nx842.h index a4d324c..4ddf68d 100644 --- a/include/linux/nx842.h +++ b/include/linux/nx842.h @@ -1,11 +1,24 @@ #ifndef __NX842_H__ #define __NX842_H__ -int nx842_get_workmem_size(void); -int nx842_get_workmem_size_aligned(void); +#define __NX842_PSERIES_MEM_COMPRESS (10240) +#define __NX842_POWERNV_MEM_COMPRESS (1024) + +#define NX842_MEM_COMPRESS (max_t(unsigned int, \ + __NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS)) + +struct nx842_constraints { + int alignment; + int multiple; + int minimum; + int maximum; +}; + +int nx842_constraints(struct nx842_constraints *constraints); + int nx842_compress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); + unsigned char *out, unsigned int *out_len, void *wrkmem); int nx842_decompress(const unsigned char *in, unsigned int in_len, - unsigned char *out, unsigned int *out_len, void *wrkmem); + unsigned char *out, unsigned int *out_len, void *wrkmem); #endif diff --git a/include/linux/random.h b/include/linux/random.h index b05856e..796267d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -14,6 +14,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code, extern void add_interrupt_randomness(int irq, int irq_flags); extern void get_random_bytes(void *buf, int nbytes); +extern void get_blocking_random_bytes(void *buf, int nbytes); extern void get_random_bytes_arch(void *buf, int nbytes); void generate_random_uuid(unsigned char uuid_out[16]); extern int random_int_secret_init(void); diff --git a/include/linux/sw842.h b/include/linux/sw842.h new file mode 100644 index 0000000..109ba04 --- /dev/null +++ b/include/linux/sw842.h @@ -0,0 +1,12 @@ +#ifndef __SW842_H__ +#define __SW842_H__ + +#define SW842_MEM_COMPRESS (0xf000) + +int sw842_compress(const u8 *src, unsigned int srclen, + u8 *dst, unsigned int *destlen, void *wmem); + +int sw842_decompress(const u8 *src, unsigned int srclen, + u8 *dst, unsigned int *destlen); + +#endif diff --git a/lib/842/842.h b/lib/842/842.h new file mode 100644 index 0000000..7c20003 --- /dev/null +++ b/lib/842/842.h @@ -0,0 +1,127 @@ + +#ifndef __842_H__ +#define __842_H__ + +/* The 842 compressed format is made up of multiple blocks, each of + * which have the format: + * + * <template>[arg1][arg2][arg3][arg4] + * + * where there are between 0 and 4 template args, depending on the specific + * template operation. For normal operations, each arg is either a specific + * number of data bytes to add to the output buffer, or an index pointing + * to a previously-written number of data bytes to copy to the output buffer. + * + * The template code is a 5-bit value. This code indicates what to do with + * the following data. Template codes from 0 to 0x19 should use the template + * table, the static "decomp_ops" table used in decompress. For each template + * (table row), there are between 1 and 4 actions; each action corresponds to + * an arg following the template code bits. Each action is either a "data" + * type action, or a "index" type action, and each action results in 2, 4, or 8 + * bytes being written to the output buffer. Each template (i.e. all actions + * in the table row) will add up to 8 bytes being written to the output buffer. + * Any row with less than 4 actions is padded with noop actions, indicated by + * N0 (for which there is no corresponding arg in the compressed data buffer). + * + * "Data" actions, indicated in the table by D2, D4, and D8, mean that the + * corresponding arg is 2, 4, or 8 bytes, respectively, in the compressed data + * buffer should be copied directly to the output buffer. + * + * "Index" actions, indicated in the table by I2, I4, and I8, mean the + * corresponding arg is an index parameter that points to, respectively, a 2, + * 4, or 8 byte value already in the output buffer, that should be copied to + * the end of the output buffer. Essentially, the index points to a position + * in a ring buffer that contains the last N bytes of output buffer data. + * The number of bits for each index's arg are: 8 bits for I2, 9 bits for I4, + * and 8 bits for I8. Since each index points to a 2, 4, or 8 byte section, + * this means that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4 + * can reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference 2048 + * bytes ((2^8 = 256) * 8 bytes). Think of it as a kind-of ring buffer for + * each of I2, I4, and I8 that are updated for each byte written to the output + * buffer. In this implementation, the output buffer is directly used for each + * index; there is no additional memory required. Note that the index is into + * a ring buffer, not a sliding window; for example, if there have been 260 + * bytes written to the output buffer, an I2 index of 0 would index to byte 256 + * in the output buffer, while an I2 index of 16 would index to byte 16 in the + * output buffer. + * + * There are also 3 special template codes; 0x1b for "repeat", 0x1c for + * "zeros", and 0x1e for "end". The "repeat" operation is followed by a 6 bit + * arg N indicating how many times to repeat. The last 8 bytes written to the + * output buffer are written again to the output buffer, N + 1 times. The + * "zeros" operation, which has no arg bits, writes 8 zeros to the output + * buffer. The "end" operation, which also has no arg bits, signals the end + * of the compressed data. There may be some number of padding (don't care, + * but usually 0) bits after the "end" operation bits, to fill the buffer + * length to a specific byte multiple (usually a multiple of 8, 16, or 32 + * bytes). + * + * This software implementation also uses one of the undefined template values, + * 0x1d as a special "short data" template code, to represent less than 8 bytes + * of uncompressed data. It is followed by a 3 bit arg N indicating how many + * data bytes will follow, and then N bytes of data, which should be copied to + * the output buffer. This allows the software 842 compressor to accept input + * buffers that are not an exact multiple of 8 bytes long. However, those + * compressed buffers containing this sw-only template will be rejected by + * the 842 hardware decompressor, and must be decompressed with this software + * library. The 842 software compression module includes a parameter to + * disable using this sw-only "short data" template, and instead simply + * reject any input buffer that is not a multiple of 8 bytes long. + * + * After all actions for each operation code are processed, another template + * code is in the next 5 bits. The decompression ends once the "end" template + * code is detected. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <asm/unaligned.h> + +#include <linux/sw842.h> + +/* special templates */ +#define OP_REPEAT (0x1B) +#define OP_ZEROS (0x1C) +#define OP_END (0x1E) + +/* sw only template - this is not in the hw design; it's used only by this + * software compressor and decompressor, to allow input buffers that aren't + * a multiple of 8. + */ +#define OP_SHORT_DATA (0x1D) + +/* additional bits of each op param */ +#define OP_BITS (5) +#define REPEAT_BITS (6) +#define SHORT_DATA_BITS (3) +#define I2_BITS (8) +#define I4_BITS (9) +#define I8_BITS (8) + +#define REPEAT_BITS_MAX (0x3f) +#define SHORT_DATA_BITS_MAX (0x7) + +/* Arbitrary values used to indicate action */ +#define OP_ACTION (0x70) +#define OP_ACTION_INDEX (0x10) +#define OP_ACTION_DATA (0x20) +#define OP_ACTION_NOOP (0x40) +#define OP_AMOUNT (0x0f) +#define OP_AMOUNT_0 (0x00) +#define OP_AMOUNT_2 (0x02) +#define OP_AMOUNT_4 (0x04) +#define OP_AMOUNT_8 (0x08) + +#define D2 (OP_ACTION_DATA | OP_AMOUNT_2) +#define D4 (OP_ACTION_DATA | OP_AMOUNT_4) +#define D8 (OP_ACTION_DATA | OP_AMOUNT_8) +#define I2 (OP_ACTION_INDEX | OP_AMOUNT_2) +#define I4 (OP_ACTION_INDEX | OP_AMOUNT_4) +#define I8 (OP_ACTION_INDEX | OP_AMOUNT_8) +#define N0 (OP_ACTION_NOOP | OP_AMOUNT_0) + +/* the max of the regular templates - not including the special templates */ +#define OPS_MAX (0x1a) + +#endif diff --git a/lib/842/842_compress.c b/lib/842/842_compress.c new file mode 100644 index 0000000..7ce6894 --- /dev/null +++ b/lib/842/842_compress.c @@ -0,0 +1,626 @@ +/* + * 842 Software Compression + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * See 842.h for details of the 842 compressed format. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define MODULE_NAME "842_compress" + +#include <linux/hashtable.h> + +#include "842.h" +#include "842_debugfs.h" + +#define SW842_HASHTABLE8_BITS (10) +#define SW842_HASHTABLE4_BITS (11) +#define SW842_HASHTABLE2_BITS (10) + +/* By default, we allow compressing input buffers of any length, but we must + * use the non-standard "short data" template so the decompressor can correctly + * reproduce the uncompressed data buffer at the right length. However the + * hardware 842 compressor will not recognize the "short data" template, and + * will fail to decompress any compressed buffer containing it (I have no idea + * why anyone would want to use software to compress and hardware to decompress + * but that's beside the point). This parameter forces the compression + * function to simply reject any input buffer that isn't a multiple of 8 bytes + * long, instead of using the "short data" template, so that all compressed + * buffers produced by this function will be decompressable by the 842 hardware + * decompressor. Unless you have a specific need for that, leave this disabled + * so that any length buffer can be compressed. + */ +static bool sw842_strict; +module_param_named(strict, sw842_strict, bool, 0644); + +static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ + { I8, N0, N0, N0, 0x19 }, /* 8 */ + { I4, I4, N0, N0, 0x18 }, /* 18 */ + { I4, I2, I2, N0, 0x17 }, /* 25 */ + { I2, I2, I4, N0, 0x13 }, /* 25 */ + { I2, I2, I2, I2, 0x12 }, /* 32 */ + { I4, I2, D2, N0, 0x16 }, /* 33 */ + { I4, D2, I2, N0, 0x15 }, /* 33 */ + { I2, D2, I4, N0, 0x0e }, /* 33 */ + { D2, I2, I4, N0, 0x09 }, /* 33 */ + { I2, I2, I2, D2, 0x11 }, /* 40 */ + { I2, I2, D2, I2, 0x10 }, /* 40 */ + { I2, D2, I2, I2, 0x0d }, /* 40 */ + { D2, I2, I2, I2, 0x08 }, /* 40 */ + { I4, D4, N0, N0, 0x14 }, /* 41 */ + { D4, I4, N0, N0, 0x04 }, /* 41 */ + { I2, I2, D4, N0, 0x0f }, /* 48 */ + { I2, D2, I2, D2, 0x0c }, /* 48 */ + { I2, D4, I2, N0, 0x0b }, /* 48 */ + { D2, I2, I2, D2, 0x07 }, /* 48 */ + { D2, I2, D2, I2, 0x06 }, /* 48 */ + { D4, I2, I2, N0, 0x03 }, /* 48 */ + { I2, D2, D4, N0, 0x0a }, /* 56 */ + { D2, I2, D4, N0, 0x05 }, /* 56 */ + { D4, I2, D2, N0, 0x02 }, /* 56 */ + { D4, D2, I2, N0, 0x01 }, /* 56 */ + { D8, N0, N0, N0, 0x00 }, /* 64 */ +}; + +struct sw842_hlist_node8 { + struct hlist_node node; + u64 data; + u8 index; +}; + +struct sw842_hlist_node4 { + struct hlist_node node; + u32 data; + u16 index; +}; + +struct sw842_hlist_node2 { + struct hlist_node node; + u16 data; + u8 index; +}; + +#define INDEX_NOT_FOUND (-1) +#define INDEX_NOT_CHECKED (-2) + +struct sw842_param { + u8 *in; + u8 *instart; + u64 ilen; + u8 *out; + u64 olen; + u8 bit; + u64 data8[1]; + u32 data4[2]; + u16 data2[4]; + int index8[1]; + int index4[2]; + int index2[4]; + DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); + DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); + DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); + struct sw842_hlist_node8 node8[1 << I8_BITS]; + struct sw842_hlist_node4 node4[1 << I4_BITS]; + struct sw842_hlist_node2 node2[1 << I2_BITS]; +}; + +#define get_input_data(p, o, b) \ + be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) + +#define init_hashtable_nodes(p, b) do { \ + int _i; \ + hash_init((p)->htable##b); \ + for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \ + (p)->node##b[_i].index = _i; \ + (p)->node##b[_i].data = 0; \ + INIT_HLIST_NODE(&(p)->node##b[_i].node); \ + } \ +} while (0) + +#define find_index(p, b, n) ({ \ + struct sw842_hlist_node##b *_n; \ + p->index##b[n] = INDEX_NOT_FOUND; \ + hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \ + if (p->data##b[n] == _n->data) { \ + p->index##b[n] = _n->index; \ + break; \ + } \ + } \ + p->index##b[n] >= 0; \ +}) + +#define check_index(p, b, n) \ + ((p)->index##b[n] == INDEX_NOT_CHECKED \ + ? find_index(p, b, n) \ + : (p)->index##b[n] >= 0) + +#define replace_hash(p, b, i, d) do { \ + struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \ + hash_del(&_n->node); \ + _n->data = (p)->data##b[d]; \ + pr_debug("add hash index%x %x pos %x data %lx\n", b, \ + (unsigned int)_n->index, \ + (unsigned int)((p)->in - (p)->instart), \ + (unsigned long)_n->data); \ + hash_add((p)->htable##b, &_n->node, _n->data); \ +} while (0) + +static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; + +static int add_bits(struct sw842_param *p, u64 d, u8 n); + +static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) +{ + int ret; + + if (n <= s) + return -EINVAL; + + ret = add_bits(p, d >> s, n - s); + if (ret) + return ret; + return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); +} + +static int add_bits(struct sw842_param *p, u64 d, u8 n) +{ + int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; + u64 o; + u8 *out = p->out; + + pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); + + if (n > 64) + return -EINVAL; + + /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), + * or if we're at the end of the output buffer and would write past end + */ + if (bits > 64) + return __split_add_bits(p, d, n, 32); + else if (p->olen < 8 && bits > 32 && bits <= 56) + return __split_add_bits(p, d, n, 16); + else if (p->olen < 4 && bits > 16 && bits <= 24) + return __split_add_bits(p, d, n, 8); + + if (DIV_ROUND_UP(bits, 8) > p->olen) + return -ENOSPC; + + o = *out & bmask[b]; + d <<= s; + + if (bits <= 8) + *out = o | d; + else if (bits <= 16) + put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); + else if (bits <= 24) + put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); + else if (bits <= 32) + put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); + else if (bits <= 40) + put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); + else if (bits <= 48) + put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); + else if (bits <= 56) + put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); + else + put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); + + p->bit += n; + + if (p->bit > 7) { + p->out += p->bit / 8; + p->olen -= p->bit / 8; + p->bit %= 8; + } + + return 0; +} + +static int add_template(struct sw842_param *p, u8 c) +{ + int ret, i, b = 0; + u8 *t = comp_ops[c]; + bool inv = false; + + if (c >= OPS_MAX) + return -EINVAL; + + pr_debug("template %x\n", t[4]); + + ret = add_bits(p, t[4], OP_BITS); + if (ret) + return ret; + + for (i = 0; i < 4; i++) { + pr_debug("op %x\n", t[i]); + + switch (t[i] & OP_AMOUNT) { + case OP_AMOUNT_8: + if (b) + inv = true; + else if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index8[0], I8_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data8[0], 64); + else + inv = true; + break; + case OP_AMOUNT_4: + if (b == 2 && t[i] & OP_ACTION_DATA) + ret = add_bits(p, get_input_data(p, 2, 32), 32); + else if (b != 0 && b != 4) + inv = true; + else if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index4[b >> 2], I4_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data4[b >> 2], 32); + else + inv = true; + break; + case OP_AMOUNT_2: + if (b != 0 && b != 2 && b != 4 && b != 6) + inv = true; + if (t[i] & OP_ACTION_INDEX) + ret = add_bits(p, p->index2[b >> 1], I2_BITS); + else if (t[i] & OP_ACTION_DATA) + ret = add_bits(p, p->data2[b >> 1], 16); + else + inv = true; + break; + case OP_AMOUNT_0: + inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); + break; + default: + inv = true; + break; + } + + if (ret) + return ret; + + if (inv) { + pr_err("Invalid templ %x op %d : %x %x %x %x\n", + c, i, t[0], t[1], t[2], t[3]); + return -EINVAL; + } + + b += t[i] & OP_AMOUNT; + } + + if (b != 8) { + pr_err("Invalid template %x len %x : %x %x %x %x\n", + c, b, t[0], t[1], t[2], t[3]); + return -EINVAL; + } + + if (sw842_template_counts) + atomic_inc(&template_count[t[4]]); + + return 0; +} + +static int add_repeat_template(struct sw842_param *p, u8 r) +{ + int ret; + + /* repeat param is 0-based */ + if (!r || --r > REPEAT_BITS_MAX) + return -EINVAL; + + ret = add_bits(p, OP_REPEAT, OP_BITS); + if (ret) + return ret; + + ret = add_bits(p, r, REPEAT_BITS); + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_repeat_count); + + return 0; +} + +static int add_short_data_template(struct sw842_param *p, u8 b) +{ + int ret, i; + + if (!b || b > SHORT_DATA_BITS_MAX) + return -EINVAL; + + ret = add_bits(p, OP_SHORT_DATA, OP_BITS); + if (ret) + return ret; + + ret = add_bits(p, b, SHORT_DATA_BITS); + if (ret) + return ret; + + for (i = 0; i < b; i++) { + ret = add_bits(p, p->in[i], 8); + if (ret) + return ret; + } + + if (sw842_template_counts) + atomic_inc(&template_short_data_count); + + return 0; +} + +static int add_zeros_template(struct sw842_param *p) +{ + int ret = add_bits(p, OP_ZEROS, OP_BITS); + + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_zeros_count); + + return 0; +} + +static int add_end_template(struct sw842_param *p) +{ + int ret = add_bits(p, OP_END, OP_BITS); + + if (ret) + return ret; + + if (sw842_template_counts) + atomic_inc(&template_end_count); + + return 0; +} + +static bool check_template(struct sw842_param *p, u8 c) +{ + u8 *t = comp_ops[c]; + int i, match, b = 0; + + if (c >= OPS_MAX) + return false; + + for (i = 0; i < 4; i++) { + if (t[i] & OP_ACTION_INDEX) { + if (t[i] & OP_AMOUNT_2) + match = check_index(p, 2, b >> 1); + else if (t[i] & OP_AMOUNT_4) + match = check_index(p, 4, b >> 2); + else if (t[i] & OP_AMOUNT_8) + match = check_index(p, 8, 0); + else + return false; + if (!match) + return false; + } + + b += t[i] & OP_AMOUNT; + } + + return true; +} + +static void get_next_data(struct sw842_param *p) +{ + p->data8[0] = get_input_data(p, 0, 64); + p->data4[0] = get_input_data(p, 0, 32); + p->data4[1] = get_input_data(p, 4, 32); + p->data2[0] = get_input_data(p, 0, 16); + p->data2[1] = get_input_data(p, 2, 16); + p->data2[2] = get_input_data(p, 4, 16); + p->data2[3] = get_input_data(p, 6, 16); +} + +/* update the hashtable entries. + * only call this after finding/adding the current template + * the dataN fields for the current 8 byte block must be already updated + */ +static void update_hashtables(struct sw842_param *p) +{ + u64 pos = p->in - p->instart; + u64 n8 = (pos >> 3) % (1 << I8_BITS); + u64 n4 = (pos >> 2) % (1 << I4_BITS); + u64 n2 = (pos >> 1) % (1 << I2_BITS); + + replace_hash(p, 8, n8, 0); + replace_hash(p, 4, n4, 0); + replace_hash(p, 4, n4, 1); + replace_hash(p, 2, n2, 0); + replace_hash(p, 2, n2, 1); + replace_hash(p, 2, n2, 2); + replace_hash(p, 2, n2, 3); +} + +/* find the next template to use, and add it + * the p->dataN fields must already be set for the current 8 byte block + */ +static int process_next(struct sw842_param *p) +{ + int ret, i; + + p->index8[0] = INDEX_NOT_CHECKED; + p->index4[0] = INDEX_NOT_CHECKED; + p->index4[1] = INDEX_NOT_CHECKED; + p->index2[0] = INDEX_NOT_CHECKED; + p->index2[1] = INDEX_NOT_CHECKED; + p->index2[2] = INDEX_NOT_CHECKED; + p->index2[3] = INDEX_NOT_CHECKED; + + /* check up to OPS_MAX - 1; last op is our fallback */ + for (i = 0; i < OPS_MAX - 1; i++) { + if (check_template(p, i)) + break; + } + + ret = add_template(p, i); + if (ret) + return ret; + + return 0; +} + +/** + * sw842_compress + * + * Compress the uncompressed buffer of length @ilen at @in to the output buffer + * @out, using no more than @olen bytes, using the 842 compression format. + * + * Returns: 0 on success, error on failure. The @olen parameter + * will contain the number of output bytes written on success, or + * 0 on error. + */ +int sw842_compress(const u8 *in, unsigned int ilen, + u8 *out, unsigned int *olen, void *wmem) +{ + struct sw842_param *p = (struct sw842_param *)wmem; + int ret; + u64 last, next, pad, total; + u8 repeat_count = 0; + + BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); + + init_hashtable_nodes(p, 8); + init_hashtable_nodes(p, 4); + init_hashtable_nodes(p, 2); + + p->in = (u8 *)in; + p->instart = p->in; + p->ilen = ilen; + p->out = out; + p->olen = *olen; + p->bit = 0; + + total = p->olen; + + *olen = 0; + + /* if using strict mode, we can only compress a multiple of 8 */ + if (sw842_strict && (ilen % 8)) { + pr_err("Using strict mode, can't compress len %d\n", ilen); + return -EINVAL; + } + + /* let's compress at least 8 bytes, mkay? */ + if (unlikely(ilen < 8)) + goto skip_comp; + + /* make initial 'last' different so we don't match the first time */ + last = ~get_unaligned((u64 *)p->in); + + while (p->ilen > 7) { + next = get_unaligned((u64 *)p->in); + + /* must get the next data, as we need to update the hashtable + * entries with the new data every time + */ + get_next_data(p); + + /* we don't care about endianness in last or next; + * we're just comparing 8 bytes to another 8 bytes, + * they're both the same endianness + */ + if (next == last) { + /* repeat count bits are 0-based, so we stop at +1 */ + if (++repeat_count <= REPEAT_BITS_MAX) + goto repeat; + } + if (repeat_count) { + ret = add_repeat_template(p, repeat_count); + repeat_count = 0; + if (next == last) /* reached max repeat bits */ + goto repeat; + } + + if (next == 0) + ret = add_zeros_template(p); + else + ret = process_next(p); + + if (ret) + return ret; + +repeat: + last = next; + update_hashtables(p); + p->in += 8; + p->ilen -= 8; + } + + if (repeat_count) { + ret = add_repeat_template(p, repeat_count); + if (ret) + return ret; + } + +skip_comp: + if (p->ilen > 0) { + ret = add_short_data_template(p, p->ilen); + if (ret) + return ret; + + p->in += p->ilen; + p->ilen = 0; + } + + ret = add_end_template(p); + if (ret) + return ret; + + if (p->bit) { + p->out++; + p->olen--; + p->bit = 0; + } + + /* pad compressed length to multiple of 8 */ + pad = (8 - ((total - p->olen) % 8)) % 8; + if (pad) { + if (pad > p->olen) /* we were so close! */ + return -ENOSPC; + memset(p->out, 0, pad); + p->out += pad; + p->olen -= pad; + } + + if (unlikely((total - p->olen) > UINT_MAX)) + return -ENOSPC; + + *olen = total - p->olen; + + return 0; +} +EXPORT_SYMBOL_GPL(sw842_compress); + +static int __init sw842_init(void) +{ + if (sw842_template_counts) + sw842_debugfs_create(); + + return 0; +} +module_init(sw842_init); + +static void __exit sw842_exit(void) +{ + if (sw842_template_counts) + sw842_debugfs_remove(); +} +module_exit(sw842_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software 842 Compressor"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/lib/842/842_debugfs.h b/lib/842/842_debugfs.h new file mode 100644 index 0000000..e7f3bff --- /dev/null +++ b/lib/842/842_debugfs.h @@ -0,0 +1,52 @@ + +#ifndef __842_DEBUGFS_H__ +#define __842_DEBUGFS_H__ + +#include <linux/debugfs.h> + +static bool sw842_template_counts; +module_param_named(template_counts, sw842_template_counts, bool, 0444); + +static atomic_t template_count[OPS_MAX], template_repeat_count, + template_zeros_count, template_short_data_count, template_end_count; + +static struct dentry *sw842_debugfs_root; + +static int __init sw842_debugfs_create(void) +{ + umode_t m = S_IRUGO | S_IWUSR; + int i; + + if (!debugfs_initialized()) + return -ENODEV; + + sw842_debugfs_root = debugfs_create_dir(MODULE_NAME, NULL); + if (IS_ERR(sw842_debugfs_root)) + return PTR_ERR(sw842_debugfs_root); + + for (i = 0; i < ARRAY_SIZE(template_count); i++) { + char name[32]; + + snprintf(name, 32, "template_%02x", i); + debugfs_create_atomic_t(name, m, sw842_debugfs_root, + &template_count[i]); + } + debugfs_create_atomic_t("template_repeat", m, sw842_debugfs_root, + &template_repeat_count); + debugfs_create_atomic_t("template_zeros", m, sw842_debugfs_root, + &template_zeros_count); + debugfs_create_atomic_t("template_short_data", m, sw842_debugfs_root, + &template_short_data_count); + debugfs_create_atomic_t("template_end", m, sw842_debugfs_root, + &template_end_count); + + return 0; +} + +static void __exit sw842_debugfs_remove(void) +{ + if (sw842_debugfs_root && !IS_ERR(sw842_debugfs_root)) + debugfs_remove_recursive(sw842_debugfs_root); +} + +#endif diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c new file mode 100644 index 0000000..5446ff0 --- /dev/null +++ b/lib/842/842_decompress.c @@ -0,0 +1,405 @@ +/* + * 842 Software Decompression + * + * Copyright (C) 2015 Dan Streetman, IBM Corp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * See 842.h for details of the 842 compressed format. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define MODULE_NAME "842_decompress" + +#include "842.h" +#include "842_debugfs.h" + +/* rolling fifo sizes */ +#define I2_FIFO_SIZE (2 * (1 << I2_BITS)) +#define I4_FIFO_SIZE (4 * (1 << I4_BITS)) +#define I8_FIFO_SIZE (8 * (1 << I8_BITS)) + +static u8 decomp_ops[OPS_MAX][4] = { + { D8, N0, N0, N0 }, + { D4, D2, I2, N0 }, + { D4, I2, D2, N0 }, + { D4, I2, I2, N0 }, + { D4, I4, N0, N0 }, + { D2, I2, D4, N0 }, + { D2, I2, D2, I2 }, + { D2, I2, I2, D2 }, + { D2, I2, I2, I2 }, + { D2, I2, I4, N0 }, + { I2, D2, D4, N0 }, + { I2, D4, I2, N0 }, + { I2, D2, I2, D2 }, + { I2, D2, I2, I2 }, + { I2, D2, I4, N0 }, + { I2, I2, D4, N0 }, + { I2, I2, D2, I2 }, + { I2, I2, I2, D2 }, + { I2, I2, I2, I2 }, + { I2, I2, I4, N0 }, + { I4, D4, N0, N0 }, + { I4, D2, I2, N0 }, + { I4, I2, D2, N0 }, + { I4, I2, I2, N0 }, + { I4, I4, N0, N0 }, + { I8, N0, N0, N0 } +}; + +struct sw842_param { + u8 *in; + u8 bit; + u64 ilen; + u8 *out; + u8 *ostart; + u64 olen; +}; + +#define beN_to_cpu(d, s) \ + ((s) == 2 ? be16_to_cpu(get_unaligned((__be16 *)d)) : \ + (s) == 4 ? be32_to_cpu(get_unaligned((__be32 *)d)) : \ + (s) == 8 ? be64_to_cpu(get_unaligned((__be64 *)d)) : \ + WARN(1, "pr_debug param err invalid size %x\n", s)) + +static int next_bits(struct sw842_param *p, u64 *d, u8 n); + +static int __split_next_bits(struct sw842_param *p, u64 *d, u8 n, u8 s) +{ + u64 tmp = 0; + int ret; + + if (n <= s) { + pr_debug("split_next_bits invalid n %u s %u\n", n, s); + return -EINVAL; + } + + ret = next_bits(p, &tmp, n - s); + if (ret) + return ret; + ret = next_bits(p, d, s); + if (ret) + return ret; + *d |= tmp << s; + return 0; +} + +static int next_bits(struct sw842_param *p, u64 *d, u8 n) +{ + u8 *in = p->in, b = p->bit, bits = b + n; + + if (n > 64) { + pr_debug("next_bits invalid n %u\n", n); + return -EINVAL; + } + + /* split this up if reading > 8 bytes, or if we're at the end of + * the input buffer and would read past the end + */ + if (bits > 64) + return __split_next_bits(p, d, n, 32); + else if (p->ilen < 8 && bits > 32 && bits <= 56) + return __split_next_bits(p, d, n, 16); + else if (p->ilen < 4 && bits > 16 && bits <= 24) + return __split_next_bits(p, d, n, 8); + + if (DIV_ROUND_UP(bits, 8) > p->ilen) + return -EOVERFLOW; + + if (bits <= 8) + *d = *in >> (8 - bits); + else if (bits <= 16) + *d = be16_to_cpu(get_unaligned((__be16 *)in)) >> (16 - bits); + else if (bits <= 32) + *d = be32_to_cpu(get_unaligned((__be32 *)in)) >> (32 - bits); + else + *d = be64_to_cpu(get_unaligned((__be64 *)in)) >> (64 - bits); + + *d &= GENMASK_ULL(n - 1, 0); + + p->bit += n; + + if (p->bit > 7) { + p->in += p->bit / 8; + p->ilen -= p->bit / 8; + p->bit %= 8; + } + + return 0; +} + +static int do_data(struct sw842_param *p, u8 n) +{ + u64 v; + int ret; + + if (n > p->olen) + return -ENOSPC; + + ret = next_bits(p, &v, n * 8); + if (ret) + return ret; + + switch (n) { + case 2: + put_unaligned(cpu_to_be16((u16)v), (__be16 *)p->out); + break; + case 4: + put_unaligned(cpu_to_be32((u32)v), (__be32 *)p->out); + break; + case 8: + put_unaligned(cpu_to_be64((u64)v), (__be64 *)p->out); + break; + default: + return -EINVAL; + } + + p->out += n; + p->olen -= n; + + return 0; +} + +static int __do_index(struct sw842_param *p, u8 size, u8 bits, u64 fsize) +{ + u64 index, offset, total = round_down(p->out - p->ostart, 8); + int ret; + + ret = next_bits(p, &index, bits); + if (ret) + return ret; + + offset = index * size; + + /* a ring buffer of fsize is used; correct the offset */ + if (total > fsize) { + /* this is where the current fifo is */ + u64 section = round_down(total, fsize); + /* the current pos in the fifo */ + u64 pos = total - section; + + /* if the offset is past/at the pos, we need to + * go back to the last fifo section + */ + if (offset >= pos) + section -= fsize; + + offset += section; + } + + if (offset + size > total) { + pr_debug("index%x %lx points past end %lx\n", size, + (unsigned long)offset, (unsigned long)total); + return -EINVAL; + } + + pr_debug("index%x to %lx off %lx adjoff %lx tot %lx data %lx\n", + size, (unsigned long)index, (unsigned long)(index * size), + (unsigned long)offset, (unsigned long)total, + (unsigned long)beN_to_cpu(&p->ostart[offset], size)); + + memcpy(p->out, &p->ostart[offset], size); + p->out += size; + p->olen -= size; + + return 0; +} + +static int do_index(struct sw842_param *p, u8 n) +{ + switch (n) { + case 2: + return __do_index(p, 2, I2_BITS, I2_FIFO_SIZE); + case 4: + return __do_index(p, 4, I4_BITS, I4_FIFO_SIZE); + case 8: + return __do_index(p, 8, I8_BITS, I8_FIFO_SIZE); + default: + return -EINVAL; + } +} + +static int do_op(struct sw842_param *p, u8 o) +{ + int i, ret = 0; + + if (o >= OPS_MAX) + return -EINVAL; + + for (i = 0; i < 4; i++) { + u8 op = decomp_ops[o][i]; + + pr_debug("op is %x\n", op); + + switch (op & OP_ACTION) { + case OP_ACTION_DATA: + ret = do_data(p, op & OP_AMOUNT); + break; + case OP_ACTION_INDEX: + ret = do_index(p, op & OP_AMOUNT); + break; + case OP_ACTION_NOOP: + break; + default: + pr_err("Interal error, invalid op %x\n", op); + return -EINVAL; + } + + if (ret) + return ret; + } + + if (sw842_template_counts) + atomic_inc(&template_count[o]); + + return 0; +} + +/** + * sw842_decompress + * + * Decompress the 842-compressed buffer of length @ilen at @in + * to the output buffer @out, using no more than @olen bytes. + * + * The compressed buffer must be only a single 842-compressed buffer, + * with the standard format described in the comments in 842.h + * Processing will stop when the 842 "END" template is detected, + * not the end of the buffer. + * + * Returns: 0 on success, error on failure. The @olen parameter + * will contain the number of output bytes written on success, or + * 0 on error. + */ +int sw842_decompress(const u8 *in, unsigned int ilen, + u8 *out, unsigned int *olen) +{ + struct sw842_param p; + int ret; + u64 op, rep, tmp, bytes, total; + + p.in = (u8 *)in; + p.bit = 0; + p.ilen = ilen; + p.out = out; + p.ostart = out; + p.olen = *olen; + + total = p.olen; + + *olen = 0; + + do { + ret = next_bits(&p, &op, OP_BITS); + if (ret) + return ret; + + pr_debug("template is %lx\n", (unsigned long)op); + + switch (op) { + case OP_REPEAT: + ret = next_bits(&p, &rep, REPEAT_BITS); + if (ret) + return ret; + + if (p.out == out) /* no previous bytes */ + return -EINVAL; + + /* copy rep + 1 */ + rep++; + + if (rep * 8 > p.olen) + return -ENOSPC; + + while (rep-- > 0) { + memcpy(p.out, p.out - 8, 8); + p.out += 8; + p.olen -= 8; + } + + if (sw842_template_counts) + atomic_inc(&template_repeat_count); + + break; + case OP_ZEROS: + if (8 > p.olen) + return -ENOSPC; + + memset(p.out, 0, 8); + p.out += 8; + p.olen -= 8; + + if (sw842_template_counts) + atomic_inc(&template_zeros_count); + + break; + case OP_SHORT_DATA: + ret = next_bits(&p, &bytes, SHORT_DATA_BITS); + if (ret) + return ret; + + if (!bytes || bytes > SHORT_DATA_BITS_MAX) + return -EINVAL; + + while (bytes-- > 0) { + ret = next_bits(&p, &tmp, 8); + if (ret) + return ret; + *p.out = (u8)tmp; + p.out++; + p.olen--; + } + + if (sw842_template_counts) + atomic_inc(&template_short_data_count); + + break; + case OP_END: + if (sw842_template_counts) + atomic_inc(&template_end_count); + + break; + default: /* use template */ + ret = do_op(&p, op); + if (ret) + return ret; + break; + } + } while (op != OP_END); + + if (unlikely((total - p.olen) > UINT_MAX)) + return -ENOSPC; + + *olen = total - p.olen; + + return 0; +} +EXPORT_SYMBOL_GPL(sw842_decompress); + +static int __init sw842_init(void) +{ + if (sw842_template_counts) + sw842_debugfs_create(); + + return 0; +} +module_init(sw842_init); + +static void __exit sw842_exit(void) +{ + if (sw842_template_counts) + sw842_debugfs_remove(); +} +module_exit(sw842_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Software 842 Decompressor"); +MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); diff --git a/lib/842/Makefile b/lib/842/Makefile new file mode 100644 index 0000000..5d24c0b --- /dev/null +++ b/lib/842/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_842_COMPRESS) += 842_compress.o +obj-$(CONFIG_842_DECOMPRESS) += 842_decompress.o diff --git a/lib/Kconfig b/lib/Kconfig index 87da53b..42c925e 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -213,6 +213,12 @@ config RANDOM32_SELFTEST # # compression support is select'ed if needed # +config 842_COMPRESS + tristate + +config 842_DECOMPRESS + tristate + config ZLIB_INFLATE tristate diff --git a/lib/Makefile b/lib/Makefile index 58f74d2..a1d67e4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -78,6 +78,8 @@ obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o +obj-$(CONFIG_842_COMPRESS) += 842/ +obj-$(CONFIG_842_DECOMPRESS) += 842/ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 208df7c..70d53da 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -11,9 +11,8 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/crypto.h> #include <linux/err.h> -#include <crypto/aes.h> +#include <crypto/aead.h> #include <net/mac80211.h> #include "key.h" diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c index fd278bb..b91c9d7 100644 --- a/net/mac80211/aes_gcm.c +++ b/net/mac80211/aes_gcm.c @@ -8,9 +8,8 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/crypto.h> #include <linux/err.h> -#include <crypto/aes.h> +#include <crypto/aead.h> #include <net/mac80211.h> #include "key.h" diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index f1321b7..c34b06c 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -9,8 +9,8 @@ #include <linux/kernel.h> #include <linux/types.h> -#include <linux/crypto.h> #include <linux/err.h> +#include <crypto/aead.h> #include <crypto/aes.h> #include <net/mac80211.h> diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index dcf7395..3ccf1e9 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -17,8 +17,9 @@ #include <linux/err.h> #include <linux/bug.h> #include <linux/completion.h> +#include <linux/crypto.h> #include <linux/ieee802154.h> -#include <crypto/algapi.h> +#include <crypto/aead.h> #include "ieee802154_i.h" #include "llsec.h" |