diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-25 15:56:15 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-25 15:56:15 -0800 |
commit | 32dc43e40a2707d0cb1ab8768d080c3e9bcfed52 (patch) | |
tree | 415f3a1935fba0db2f0410360983587bf65ee712 | |
parent | d414c104e26fd3b597f855cc29473a8b1527fb4c (diff) | |
parent | 8fd61d34226014fe7886babfca6f45a7eff89d25 (diff) | |
download | op-kernel-dev-32dc43e40a2707d0cb1ab8768d080c3e9bcfed52.zip op-kernel-dev-32dc43e40a2707d0cb1ab8768d080c3e9bcfed52.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu:
"Here is the crypto update for 3.9:
- Added accelerated implementation of crc32 using pclmulqdq.
- Added test vector for fcrypt.
- Added support for OMAP4/AM33XX cipher and hash.
- Fixed loose crypto_user input checks.
- Misc fixes"
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (43 commits)
crypto: user - ensure user supplied strings are nul-terminated
crypto: user - fix empty string test in report API
crypto: user - fix info leaks in report API
crypto: caam - Added property fsl,sec-era in SEC4.0 device tree binding.
crypto: use ERR_CAST
crypto: atmel-aes - adjust duplicate test
crypto: crc32-pclmul - Kill warning on x86-32
crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels
crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC
crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets
crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*
crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions
crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC
crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions
crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
crypto: aesni-intel - add ENDPROC statements for assembler functions
crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets
crypto: testmgr - add test vector for fcrypt
...
51 files changed, 2146 insertions, 765 deletions
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt index 6d21c02..e402277 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt @@ -113,7 +113,7 @@ PROPERTIES EXAMPLE crypto@300000 { compatible = "fsl,sec-v4.0"; - fsl,sec-era = <0x2>; + fsl,sec-era = <2>; #address-cells = <1>; #size-cells = <1>; reg = <0x300000 0x10000>; diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0ca7c9..63947a8 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o @@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o +crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index b949ec2..2849dbc 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -36,6 +36,7 @@ .file "aes-i586-asm.S" .text +#include <linux/linkage.h> #include <asm/asm-offsets.h> #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) @@ -219,14 +220,10 @@ // AES (Rijndael) Encryption Subroutine /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_enc_blk - .extern crypto_ft_tab .extern crypto_fl_tab -.align 4 - -aes_enc_blk: +ENTRY(aes_enc_blk) push %ebp mov ctx(%esp),%ebp @@ -290,18 +287,15 @@ aes_enc_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_enc_blk) // AES (Rijndael) Decryption Subroutine /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_dec_blk - .extern crypto_it_tab .extern crypto_il_tab -.align 4 - -aes_dec_blk: +ENTRY(aes_dec_blk) push %ebp mov ctx(%esp),%ebp @@ -365,3 +359,4 @@ aes_dec_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_dec_blk) diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5..9105655 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -15,6 +15,7 @@ .text +#include <linux/linkage.h> #include <asm/asm-offsets.h> #define R1 %rax @@ -49,10 +50,8 @@ #define R11 %r11 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ - .global FUNC; \ - .type FUNC,@function; \ - .align 8; \ -FUNC: movq r1,r2; \ + ENTRY(FUNC); \ + movq r1,r2; \ movq r3,r4; \ leaq KEY+48(r8),r9; \ movq r10,r11; \ @@ -71,14 +70,15 @@ FUNC: movq r1,r2; \ je B192; \ leaq 32(r9),r9; -#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ +#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ movq r1,r2; \ movq r3,r4; \ movl r5 ## E,(r9); \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ movl r8 ## E,12(r9); \ - ret; + ret; \ + ENDPROC(FUNC); #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ movzbl r2 ## H,r5 ## E; \ @@ -133,7 +133,7 @@ FUNC: movq r1,r2; \ #define entry(FUNC,KEY,B128,B192) \ prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) -#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) +#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) #define encrypt_round(TAB,OFFSET) \ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ @@ -151,12 +151,12 @@ FUNC: movq r1,r2; \ /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_enc_blk,0,enc128,enc192) + entry(aes_enc_blk,0,.Le128,.Le192) encrypt_round(crypto_ft_tab,-96) encrypt_round(crypto_ft_tab,-80) -enc192: encrypt_round(crypto_ft_tab,-64) +.Le192: encrypt_round(crypto_ft_tab,-64) encrypt_round(crypto_ft_tab,-48) -enc128: encrypt_round(crypto_ft_tab,-32) +.Le128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab,-16) encrypt_round(crypto_ft_tab, 0) encrypt_round(crypto_ft_tab, 16) @@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab, 80) encrypt_round(crypto_ft_tab, 96) encrypt_final(crypto_fl_tab,112) - return + return(aes_enc_blk) /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_dec_blk,240,dec128,dec192) + entry(aes_dec_blk,240,.Ld128,.Ld192) decrypt_round(crypto_it_tab,-96) decrypt_round(crypto_it_tab,-80) -dec192: decrypt_round(crypto_it_tab,-64) +.Ld192: decrypt_round(crypto_it_tab,-64) decrypt_round(crypto_it_tab,-48) -dec128: decrypt_round(crypto_it_tab,-32) +.Ld128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab,-16) decrypt_round(crypto_it_tab, 0) decrypt_round(crypto_it_tab, 16) @@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab, 80) decrypt_round(crypto_it_tab, 96) decrypt_final(crypto_il_tab,112) - return + return(aes_dec_blk) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624..04b7977 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ - ENTRY(aesni_gcm_dec) push %r12 push %r13 @@ -1437,6 +1436,7 @@ _return_T_done_decrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_dec) /***************************************************************************** @@ -1700,10 +1700,12 @@ _return_T_done_encrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_enc) #endif +.align 4 _key_expansion_128: _key_expansion_256a: pshufd $0b11111111, %xmm1, %xmm1 @@ -1715,6 +1717,8 @@ _key_expansion_256a: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_128) +ENDPROC(_key_expansion_256a) .align 4 _key_expansion_192a: @@ -1739,6 +1743,7 @@ _key_expansion_192a: movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP ret +ENDPROC(_key_expansion_192a) .align 4 _key_expansion_192b: @@ -1758,6 +1763,7 @@ _key_expansion_192b: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_192b) .align 4 _key_expansion_256b: @@ -1770,6 +1776,7 @@ _key_expansion_256b: movaps %xmm2, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_256b) /* * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key) popl KEYP #endif ret +ENDPROC(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -1903,6 +1911,7 @@ ENTRY(aesni_enc) popl KEYP #endif ret +ENDPROC(aesni_enc) /* * _aesni_enc1: internal ABI @@ -1960,6 +1969,7 @@ _aesni_enc1: movaps 0x70(TKEYP), KEY AESENCLAST KEY STATE ret +ENDPROC(_aesni_enc1) /* * _aesni_enc4: internal ABI @@ -2068,6 +2078,7 @@ _aesni_enc4: AESENCLAST KEY STATE3 AESENCLAST KEY STATE4 ret +ENDPROC(_aesni_enc4) /* * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -2090,6 +2101,7 @@ ENTRY(aesni_dec) popl KEYP #endif ret +ENDPROC(aesni_dec) /* * _aesni_dec1: internal ABI @@ -2147,6 +2159,7 @@ _aesni_dec1: movaps 0x70(TKEYP), KEY AESDECLAST KEY STATE ret +ENDPROC(_aesni_dec1) /* * _aesni_dec4: internal ABI @@ -2255,6 +2268,7 @@ _aesni_dec4: AESDECLAST KEY STATE3 AESDECLAST KEY STATE4 ret +ENDPROC(_aesni_dec4) /* * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc) popl LEN #endif ret +ENDPROC(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec) popl LEN #endif ret +ENDPROC(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc) popl IVP #endif ret +ENDPROC(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec) popl IVP #endif ret +ENDPROC(aesni_cbc_dec) #ifdef __x86_64__ .align 16 @@ -2527,6 +2545,7 @@ _aesni_inc_init: MOVQ_R64_XMM TCTR_LOW INC MOVQ_R64_XMM CTR TCTR_LOW ret +ENDPROC(_aesni_inc_init) /* * _aesni_inc: internal ABI @@ -2555,6 +2574,7 @@ _aesni_inc: movaps CTR, IV PSHUFB_XMM BSWAP_MASK IV ret +ENDPROC(_aesni_inc) /* * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: ret +ENDPROC(aesni_ctr_enc) #endif diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 391d245..246c670 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include <linux/linkage.h> + .file "blowfish-x86_64-asm.S" .text @@ -116,11 +118,7 @@ bswapq RX0; \ xorq RX0, (RIO); -.align 8 -.global __blowfish_enc_blk -.type __blowfish_enc_blk,@function; - -__blowfish_enc_blk: +ENTRY(__blowfish_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -148,19 +146,16 @@ __blowfish_enc_blk: movq %r10, RIO; test %cl, %cl; - jnz __enc_xor; + jnz .L__enc_xor; write_block(); ret; -__enc_xor: +.L__enc_xor: xor_block(); ret; +ENDPROC(__blowfish_enc_blk) -.align 8 -.global blowfish_dec_blk -.type blowfish_dec_blk,@function; - -blowfish_dec_blk: +ENTRY(blowfish_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -189,6 +184,7 @@ blowfish_dec_blk: movq %r11, %rbp; ret; +ENDPROC(blowfish_dec_blk) /********************************************************************** 4-way blowfish, four blocks parallel @@ -300,11 +296,7 @@ blowfish_dec_blk: bswapq RX3; \ xorq RX3, 24(RIO); -.align 8 -.global __blowfish_enc_blk_4way -.type __blowfish_enc_blk_4way,@function; - -__blowfish_enc_blk_4way: +ENTRY(__blowfish_enc_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -336,7 +328,7 @@ __blowfish_enc_blk_4way: movq %r11, RIO; test %bpl, %bpl; - jnz __enc_xor4; + jnz .L__enc_xor4; write_block4(); @@ -344,18 +336,15 @@ __blowfish_enc_blk_4way: popq %rbp; ret; -__enc_xor4: +.L__enc_xor4: xor_block4(); popq %rbx; popq %rbp; ret; +ENDPROC(__blowfish_enc_blk_4way) -.align 8 -.global blowfish_dec_blk_4way -.type blowfish_dec_blk_4way,@function; - -blowfish_dec_blk_4way: +ENTRY(blowfish_dec_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -387,4 +376,4 @@ blowfish_dec_blk_4way: popq %rbp; ret; - +ENDPROC(blowfish_dec_blk_4way) diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2306d2e..cfc1634 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -15,6 +15,8 @@ * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz */ +#include <linux/linkage.h> + #define CAMELLIA_TABLE_BYTE_LEN 272 /* struct camellia_ctx: */ @@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); ret; +ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: @@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); ret; +ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* * IN/OUT: @@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: .text .align 8 -.type __camellia_enc_blk16,@function; - __camellia_enc_blk16: /* input: * %rdi: ctx, CTX @@ -793,10 +795,9 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; +ENDPROC(__camellia_enc_blk16) .align 8 -.type __camellia_dec_blk16,@function; - __camellia_dec_blk16: /* input: * %rdi: ctx, CTX @@ -877,12 +878,9 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; +ENDPROC(__camellia_dec_blk16) -.align 8 -.global camellia_ecb_enc_16way -.type camellia_ecb_enc_16way,@function; - -camellia_ecb_enc_16way: +ENTRY(camellia_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -903,12 +901,9 @@ camellia_ecb_enc_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_enc_16way) -.align 8 -.global camellia_ecb_dec_16way -.type camellia_ecb_dec_16way,@function; - -camellia_ecb_dec_16way: +ENTRY(camellia_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -934,12 +929,9 @@ camellia_ecb_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_dec_16way) -.align 8 -.global camellia_cbc_dec_16way -.type camellia_cbc_dec_16way,@function; - -camellia_cbc_dec_16way: +ENTRY(camellia_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -986,6 +978,7 @@ camellia_cbc_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_cbc_dec_16way) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -993,11 +986,7 @@ camellia_cbc_dec_16way: vpslldq $8, tmp, tmp; \ vpsubq tmp, x, x; -.align 8 -.global camellia_ctr_16way -.type camellia_ctr_16way,@function; - -camellia_ctr_16way: +ENTRY(camellia_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1100,3 +1089,4 @@ camellia_ctr_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ctr_16way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 0b33743..310319c 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include <linux/linkage.h> + .file "camellia-x86_64-asm_64.S" .text @@ -188,10 +190,7 @@ bswapq RAB0; \ movq RAB0, 4*2(RIO); -.global __camellia_enc_blk; -.type __camellia_enc_blk,@function; - -__camellia_enc_blk: +ENTRY(__camellia_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -214,33 +213,31 @@ __camellia_enc_blk: movl $24, RT1d; /* max */ cmpb $16, key_length(CTX); - je __enc_done; + je .L__enc_done; enc_fls(24); enc_rounds(24); movl $32, RT1d; /* max */ -__enc_done: +.L__enc_done: testb RXORbl, RXORbl; movq RDST, RIO; - jnz __enc_xor; + jnz .L__enc_xor; enc_outunpack(mov, RT1); movq RRBP, %rbp; ret; -__enc_xor: +.L__enc_xor: enc_outunpack(xor, RT1); movq RRBP, %rbp; ret; +ENDPROC(__camellia_enc_blk) -.global camellia_dec_blk; -.type camellia_dec_blk,@function; - -camellia_dec_blk: +ENTRY(camellia_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -258,12 +255,12 @@ camellia_dec_blk: dec_inpack(RT2); cmpb $24, RT2bl; - je __dec_rounds16; + je .L__dec_rounds16; dec_rounds(24); dec_fls(24); -__dec_rounds16: +.L__dec_rounds16: dec_rounds(16); dec_fls(16); dec_rounds(8); @@ -276,6 +273,7 @@ __dec_rounds16: movq RRBP, %rbp; ret; +ENDPROC(camellia_dec_blk) /********************************************************************** 2-way camellia @@ -426,10 +424,7 @@ __dec_rounds16: bswapq RAB1; \ movq RAB1, 12*2(RIO); -.global __camellia_enc_blk_2way; -.type __camellia_enc_blk_2way,@function; - -__camellia_enc_blk_2way: +ENTRY(__camellia_enc_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -453,16 +448,16 @@ __camellia_enc_blk_2way: movl $24, RT2d; /* max */ cmpb $16, key_length(CTX); - je __enc2_done; + je .L__enc2_done; enc_fls2(24); enc_rounds2(24); movl $32, RT2d; /* max */ -__enc2_done: +.L__enc2_done: test RXORbl, RXORbl; movq RDST, RIO; - jnz __enc2_xor; + jnz .L__enc2_xor; enc_outunpack2(mov, RT2); @@ -470,17 +465,15 @@ __enc2_done: popq %rbx; ret; -__enc2_xor: +.L__enc2_xor: enc_outunpack2(xor, RT2); movq RRBP, %rbp; popq %rbx; ret; +ENDPROC(__camellia_enc_blk_2way) -.global camellia_dec_blk_2way; -.type camellia_dec_blk_2way,@function; - -camellia_dec_blk_2way: +ENTRY(camellia_dec_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -499,12 +492,12 @@ camellia_dec_blk_2way: dec_inpack2(RT2); cmpb $24, RT2bl; - je __dec2_rounds16; + je .L__dec2_rounds16; dec_rounds2(24); dec_fls2(24); -__dec2_rounds16: +.L__dec2_rounds16: dec_rounds2(16); dec_fls2(16); dec_rounds2(8); @@ -518,3 +511,4 @@ __dec2_rounds16: movq RRBP, %rbp; movq RXOR, %rbx; ret; +ENDPROC(camellia_dec_blk_2way) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 15b00ac..c35fd5d 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -23,6 +23,8 @@ * */ +#include <linux/linkage.h> + .file "cast5-avx-x86_64-asm_64.S" .extern cast_s1 @@ -211,8 +213,6 @@ .text .align 16 -.type __cast5_enc_blk16,@function; - __cast5_enc_blk16: /* input: * %rdi: ctx, CTX @@ -263,14 +263,14 @@ __cast5_enc_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_enc; + jnz .L__skip_enc; round(RL, RR, 12, 1); round(RR, RL, 13, 2); round(RL, RR, 14, 3); round(RR, RL, 15, 1); -__skip_enc: +.L__skip_enc: popq %rbx; popq %rbp; @@ -282,10 +282,9 @@ __skip_enc: outunpack_blocks(RR4, RL4, RTMP, RX, RKM); ret; +ENDPROC(__cast5_enc_blk16) .align 16 -.type __cast5_dec_blk16,@function; - __cast5_dec_blk16: /* input: * %rdi: ctx, CTX @@ -323,14 +322,14 @@ __cast5_dec_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_dec; + jnz .L__skip_dec; round(RL, RR, 15, 1); round(RR, RL, 14, 3); round(RL, RR, 13, 2); round(RR, RL, 12, 1); -__dec_tail: +.L__dec_tail: round(RL, RR, 11, 3); round(RR, RL, 10, 2); round(RL, RR, 9, 1); @@ -355,15 +354,12 @@ __dec_tail: ret; -__skip_dec: +.L__skip_dec: vpsrldq $4, RKR, RKR; - jmp __dec_tail; + jmp .L__dec_tail; +ENDPROC(__cast5_dec_blk16) -.align 16 -.global cast5_ecb_enc_16way -.type cast5_ecb_enc_16way,@function; - -cast5_ecb_enc_16way: +ENTRY(cast5_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +389,9 @@ cast5_ecb_enc_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_enc_16way) -.align 16 -.global cast5_ecb_dec_16way -.type cast5_ecb_dec_16way,@function; - -cast5_ecb_dec_16way: +ENTRY(cast5_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -428,12 +421,9 @@ cast5_ecb_dec_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_dec_16way) -.align 16 -.global cast5_cbc_dec_16way -.type cast5_cbc_dec_16way,@function; - -cast5_cbc_dec_16way: +ENTRY(cast5_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -480,12 +470,9 @@ cast5_cbc_dec_16way: popq %r12; ret; +ENDPROC(cast5_cbc_dec_16way) -.align 16 -.global cast5_ctr_16way -.type cast5_ctr_16way,@function; - -cast5_ctr_16way: +ENTRY(cast5_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -556,3 +543,4 @@ cast5_ctr_16way: popq %r12; ret; +ENDPROC(cast5_ctr_16way) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 2569d0d..f93b610 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include <linux/linkage.h> #include "glue_helper-asm-avx.S" .file "cast6-avx-x86_64-asm_64.S" @@ -250,8 +251,6 @@ .text .align 8 -.type __cast6_enc_blk8,@function; - __cast6_enc_blk8: /* input: * %rdi: ctx, CTX @@ -295,10 +294,9 @@ __cast6_enc_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_enc_blk8) .align 8 -.type __cast6_dec_blk8,@function; - __cast6_dec_blk8: /* input: * %rdi: ctx, CTX @@ -341,12 +339,9 @@ __cast6_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_dec_blk8) -.align 8 -.global cast6_ecb_enc_8way -.type cast6_ecb_enc_8way,@function; - -cast6_ecb_enc_8way: +ENTRY(cast6_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -362,12 +357,9 @@ cast6_ecb_enc_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_enc_8way) -.align 8 -.global cast6_ecb_dec_8way -.type cast6_ecb_dec_8way,@function; - -cast6_ecb_dec_8way: +ENTRY(cast6_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -383,12 +375,9 @@ cast6_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_dec_8way) -.align 8 -.global cast6_cbc_dec_8way -.type cast6_cbc_dec_8way,@function; - -cast6_cbc_dec_8way: +ENTRY(cast6_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -409,12 +398,9 @@ cast6_cbc_dec_8way: popq %r12; ret; +ENDPROC(cast6_cbc_dec_8way) -.align 8 -.global cast6_ctr_8way -.type cast6_ctr_8way,@function; - -cast6_ctr_8way: +ENTRY(cast6_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -437,3 +423,4 @@ cast6_ctr_8way: popq %r12; ret; +ENDPROC(cast6_ctr_8way) diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S new file mode 100644 index 0000000..c8335014 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -0,0 +1,246 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com> + * Alexander Boyko <Alexander_Boyko@xyratex.com> + */ + +#include <linux/linkage.h> +#include <asm/inst.h> + + +.align 16 +/* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ +.Lconstant_R2R1: + .octa 0x00000001c6e415960000000154442bd4 +/* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ +.Lconstant_R4R3: + .octa 0x00000000ccaa009e00000001751997d0 +/* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ +.Lconstant_R5: + .octa 0x00000000000000000000000163cd6124 +.Lconstant_mask32: + .octa 0x000000000000000000000000FFFFFFFF +/* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ +.Lconstant_RUpoly: + .octa 0x00000001F701164100000001DB710641 + +#define CONSTANT %xmm0 + +#ifdef __x86_64__ +#define BUF %rdi +#define LEN %rsi +#define CRC %edx +#else +#define BUF %eax +#define LEN %edx +#define CRC %ecx +#endif + + + +.text +/** + * Calculate crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pclmul_le_16(unsigned char const *buffer, + * size_t len, uint crc32) + */ +.globl crc32_pclmul_le_16 +.align 4, 0x90 +crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ + movdqa (BUF), %xmm1 + movdqa 0x10(BUF), %xmm2 + movdqa 0x20(BUF), %xmm3 + movdqa 0x30(BUF), %xmm4 + movd CRC, CONSTANT + pxor CONSTANT, %xmm1 + sub $0x40, LEN + add $0x40, BUF +#ifndef __x86_64__ + /* This is for position independent code(-fPIC) support for 32bit */ + call delta +delta: + pop %ecx +#endif + cmp $0x40, LEN + jb less_64 + +#ifdef __x86_64__ + movdqa .Lconstant_R2R1(%rip), CONSTANT +#else + movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT +#endif + +loop_64:/* 64 bytes Full cache line folding */ + prefetchnta 0x40(BUF) + movdqa %xmm1, %xmm5 + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 +#ifdef __x86_64__ + movdqa %xmm4, %xmm8 +#endif + PCLMULQDQ 00, CONSTANT, %xmm1 + PCLMULQDQ 00, CONSTANT, %xmm2 + PCLMULQDQ 00, CONSTANT, %xmm3 +#ifdef __x86_64__ + PCLMULQDQ 00, CONSTANT, %xmm4 +#endif + PCLMULQDQ 0x11, CONSTANT, %xmm5 + PCLMULQDQ 0x11, CONSTANT, %xmm6 + PCLMULQDQ 0x11, CONSTANT, %xmm7 +#ifdef __x86_64__ + PCLMULQDQ 0x11, CONSTANT, %xmm8 +#endif + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 +#ifdef __x86_64__ + pxor %xmm8, %xmm4 +#else + /* xmm8 unsupported for x32 */ + movdqa %xmm4, %xmm5 + PCLMULQDQ 00, CONSTANT, %xmm4 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm4 +#endif + + pxor (BUF), %xmm1 + pxor 0x10(BUF), %xmm2 + pxor 0x20(BUF), %xmm3 + pxor 0x30(BUF), %xmm4 + + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jge loop_64 +less_64:/* Folding cache line into 128bit */ +#ifdef __x86_64__ + movdqa .Lconstant_R4R3(%rip), CONSTANT +#else + movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT +#endif + prefetchnta (BUF) + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm2, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm3, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + + cmp $0x10, LEN + jb fold_64 +loop_16:/* Folding rest buffer into 128bit */ + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor (BUF), %xmm1 + sub $0x10, LEN + add $0x10, BUF + cmp $0x10, LEN + jge loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ + psrldq $0x08, %xmm1 + pxor CONSTANT, %xmm1 + + /* final 32-bit fold */ + movdqa %xmm1, %xmm2 +#ifdef __x86_64__ + movdqa .Lconstant_R5(%rip), CONSTANT + movdqa .Lconstant_mask32(%rip), %xmm3 +#else + movdqa .Lconstant_R5 - delta(%ecx), CONSTANT + movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 +#endif + psrldq $0x04, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ +#ifdef __x86_64__ + movdqa .Lconstant_RUpoly(%rip), CONSTANT +#else + movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT +#endif + movdqa %xmm1, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x10, CONSTANT, %xmm1 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + + ret diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c new file mode 100644 index 0000000..9d014a7 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -0,0 +1,201 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation. + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> +#include <linux/crc32.h> +#include <crypto/internal/hash.h> + +#include <asm/cpufeature.h> +#include <asm/cpu_device_id.h> +#include <asm/i387.h> + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); + +static u32 __attribute__((pure)) + crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(p, iquotient, crc); + kernel_fpu_end(); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} + +static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pclmul_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32_pclmul_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); + return 0; +} + +static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32_pclmul_setkey, + .init = crc32_pclmul_init, + .update = crc32_pclmul_update, + .final = crc32_pclmul_final, + .finup = crc32_pclmul_finup, + .digest = crc32_pclmul_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + } +}; + +static const struct x86_cpu_id crc32pclmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); + + +static int __init crc32_pclmul_mod_init(void) +{ + + if (!x86_match_cpu(crc32pclmul_cpu_id)) { + pr_info("PCLMULQDQ-NI instructions are not detected.\n"); + return -ENODEV; + } + return crypto_register_shash(&alg); +} + +static void __exit crc32_pclmul_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32_pclmul_mod_init); +module_exit(crc32_pclmul_mod_fini); + +MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crc32"); +MODULE_ALIAS("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 93c6d39..cf1a7ec 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -42,6 +42,8 @@ * SOFTWARE. */ +#include <linux/linkage.h> + ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction .macro LABEL prefix n @@ -68,8 +70,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); -.global crc_pcl -crc_pcl: +ENTRY(crc_pcl) #define bufp %rdi #define bufp_dw %edi #define bufp_w %di @@ -323,6 +324,9 @@ JMPTBL_ENTRY %i .noaltmacro i=i+1 .endr + +ENDPROC(crc_pcl) + ################################################################ ## PCLMULQDQ tables ## Table is 128 entries x 2 quad words each diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1eb7f90..586f41a 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -94,6 +94,7 @@ __clmul_gf128mul_ble: pxor T2, T1 pxor T1, DATA ret +ENDPROC(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const be128 *shash) */ ENTRY(clmul_ghash_mul) @@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul) PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) ret +ENDPROC(clmul_ghash_mul) /* * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, @@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: ret +ENDPROC(clmul_ghash_update) /* * void clmul_ghash_setkey(be128 *shash, const u8 *key); @@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey) pxor %xmm1, %xmm0 movups %xmm0, (%rdi) ret +ENDPROC(clmul_ghash_setkey) diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S index 72eb306..329452b8 100644 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -2,11 +2,12 @@ # D. J. Bernstein # Public domain. -# enter ECRYPT_encrypt_bytes +#include <linux/linkage.h> + .text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %esp,%eax and $31,%eax add $256,%eax @@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes: add $64,%esi # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1060,11 +1060,10 @@ ECRYPT_keysetup: # leave add %eax,%esp ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1112,3 +1111,4 @@ ECRYPT_ivsetup: # leave add %eax,%esp ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b..9279e0b 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -1,8 +1,7 @@ -# enter ECRYPT_encrypt_bytes -.text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: +#include <linux/linkage.h> + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes: # comment:fp stack unchanged by jump # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -892,11 +890,10 @@ ECRYPT_keysetup: mov %rdi,%rax mov %rsi,%rdx ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -918,3 +915,4 @@ ECRYPT_ivsetup: mov %rdi,%rax mov %rsi,%rdx ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index a3a3c02..5e8e677 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -26,11 +26,6 @@ #define SALSA20_MIN_KEY_SIZE 16U #define SALSA20_MAX_KEY_SIZE 32U -// use the ECRYPT_* function names -#define salsa20_keysetup ECRYPT_keysetup -#define salsa20_ivsetup ECRYPT_ivsetup -#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes - struct salsa20_ctx { u32 input[16]; diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 02b0e9f..43c9386 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -24,6 +24,7 @@ * */ +#include <linux/linkage.h> #include "glue_helper-asm-avx.S" .file "serpent-avx-x86_64-asm_64.S" @@ -566,8 +567,6 @@ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -.type __serpent_enc_blk8_avx,@function; - __serpent_enc_blk8_avx: /* input: * %rdi: ctx, CTX @@ -619,10 +618,9 @@ __serpent_enc_blk8_avx: write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk8_avx) .align 8 -.type __serpent_dec_blk8_avx,@function; - __serpent_dec_blk8_avx: /* input: * %rdi: ctx, CTX @@ -674,12 +672,9 @@ __serpent_dec_blk8_avx: write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_dec_blk8_avx) -.align 8 -.global serpent_ecb_enc_8way_avx -.type serpent_ecb_enc_8way_avx,@function; - -serpent_ecb_enc_8way_avx: +ENTRY(serpent_ecb_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx: store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ecb_enc_8way_avx) -.align 8 -.global serpent_ecb_dec_8way_avx -.type serpent_ecb_dec_8way_avx,@function; - -serpent_ecb_dec_8way_avx: +ENTRY(serpent_ecb_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx: store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_ecb_dec_8way_avx) -.align 8 -.global serpent_cbc_dec_8way_avx -.type serpent_cbc_dec_8way_avx,@function; - -serpent_cbc_dec_8way_avx: +ENTRY(serpent_cbc_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx: store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_cbc_dec_8way_avx) -.align 8 -.global serpent_ctr_8way_avx -.type serpent_ctr_8way_avx,@function; - -serpent_ctr_8way_avx: +ENTRY(serpent_ctr_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -752,3 +738,4 @@ serpent_ctr_8way_avx: store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ctr_8way_avx) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index c00053d..d348f15 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -24,6 +24,8 @@ * */ +#include <linux/linkage.h> + .file "serpent-sse2-i586-asm_32.S" .text @@ -510,11 +512,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_4way -.type __serpent_enc_blk_4way,@function; - -__serpent_enc_blk_4way: +ENTRY(__serpent_enc_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -566,22 +564,19 @@ __serpent_enc_blk_4way: movl arg_dst(%esp), %eax; cmpb $0, arg_xor(%esp); - jnz __enc_xor4; + jnz .L__enc_xor4; write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; -__enc_xor4: +.L__enc_xor4: xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; +ENDPROC(__serpent_enc_blk_4way) -.align 8 -.global serpent_dec_blk_4way -.type serpent_dec_blk_4way,@function; - -serpent_dec_blk_4way: +ENTRY(serpent_dec_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -633,3 +628,4 @@ serpent_dec_blk_4way: write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); ret; +ENDPROC(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 3ee1ff0..acc066c 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -24,6 +24,8 @@ * */ +#include <linux/linkage.h> + .file "serpent-sse2-x86_64-asm_64.S" .text @@ -632,11 +634,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; - -__serpent_enc_blk_8way: +ENTRY(__serpent_enc_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -687,24 +685,21 @@ __serpent_enc_blk_8way: leaq (4*4*4)(%rsi), %rax; testb %cl, %cl; - jnz __enc_xor8; + jnz .L__enc_xor8; write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -__enc_xor8: +.L__enc_xor8: xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk_8way) -.align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; - -serpent_dec_blk_8way: +ENTRY(serpent_dec_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -756,3 +751,4 @@ serpent_dec_blk_8way: write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(serpent_dec_blk_8way) diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 49d6987..a410950 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -28,6 +28,8 @@ * (at your option) any later version. */ +#include <linux/linkage.h> + #define CTX %rdi // arg1 #define BUF %rsi // arg2 #define CNT %rdx // arg3 @@ -69,10 +71,8 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - .global \name - .type \name, @function - .align 32 -\name: + ENTRY(\name) + push %rbx push %rbp push %r12 @@ -106,7 +106,7 @@ pop %rbx ret - .size \name, .-\name + ENDPROC(\name) .endm /* diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index ebac16b..8d3e113 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include <linux/linkage.h> #include "glue_helper-asm-avx.S" .file "twofish-avx-x86_64-asm_64.S" @@ -243,8 +244,6 @@ vpxor x3, wkey, x3; .align 8 -.type __twofish_enc_blk8,@function; - __twofish_enc_blk8: /* input: * %rdi: ctx, CTX @@ -284,10 +283,9 @@ __twofish_enc_blk8: outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_enc_blk8) .align 8 -.type __twofish_dec_blk8,@function; - __twofish_dec_blk8: /* input: * %rdi: ctx, CTX @@ -325,12 +323,9 @@ __twofish_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_dec_blk8) -.align 8 -.global twofish_ecb_enc_8way -.type twofish_ecb_enc_8way,@function; - -twofish_ecb_enc_8way: +ENTRY(twofish_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -346,12 +341,9 @@ twofish_ecb_enc_8way: store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); ret; +ENDPROC(twofish_ecb_enc_8way) -.align 8 -.global twofish_ecb_dec_8way -.type twofish_ecb_dec_8way,@function; - -twofish_ecb_dec_8way: +ENTRY(twofish_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -367,12 +359,9 @@ twofish_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(twofish_ecb_dec_8way) -.align 8 -.global twofish_cbc_dec_8way -.type twofish_cbc_dec_8way,@function; - -twofish_cbc_dec_8way: +ENTRY(twofish_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +382,9 @@ twofish_cbc_dec_8way: popq %r12; ret; +ENDPROC(twofish_cbc_dec_8way) -.align 8 -.global twofish_ctr_8way -.type twofish_ctr_8way,@function; - -twofish_ctr_8way: +ENTRY(twofish_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -421,3 +407,4 @@ twofish_ctr_8way: popq %r12; ret; +ENDPROC(twofish_ctr_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 658af4b..694ea45 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -20,6 +20,7 @@ .file "twofish-i586-asm.S" .text +#include <linux/linkage.h> #include <asm/asm-offsets.h> /* return address at 0 */ @@ -219,11 +220,7 @@ xor %esi, d ## D;\ ror $1, d ## D; -.align 4 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -277,8 +274,9 @@ twofish_enc_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -333,3 +331,4 @@ twofish_dec_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 5b012a2..1c3b7ce 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -20,6 +20,8 @@ * */ +#include <linux/linkage.h> + .file "twofish-x86_64-asm-3way.S" .text @@ -214,11 +216,7 @@ rorq $32, RAB2; \ outunpack3(mov, RIO, 2, RAB, 2); -.align 8 -.global __twofish_enc_blk_3way -.type __twofish_enc_blk_3way,@function; - -__twofish_enc_blk_3way: +ENTRY(__twofish_enc_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -250,7 +248,7 @@ __twofish_enc_blk_3way: popq %rbp; /* bool xor */ testb %bpl, %bpl; - jnz __enc_xor3; + jnz .L__enc_xor3; outunpack_enc3(mov); @@ -262,7 +260,7 @@ __twofish_enc_blk_3way: popq %r15; ret; -__enc_xor3: +.L__enc_xor3: outunpack_enc3(xor); popq %rbx; @@ -272,11 +270,9 @@ __enc_xor3: popq %r14; popq %r15; ret; +ENDPROC(__twofish_enc_blk_3way) -.global twofish_dec_blk_3way -.type twofish_dec_blk_3way,@function; - -twofish_dec_blk_3way: +ENTRY(twofish_dec_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -313,4 +309,4 @@ twofish_dec_blk_3way: popq %r14; popq %r15; ret; - +ENDPROC(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fc..a039d21 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -20,6 +20,7 @@ .file "twofish-x86_64-asm.S" .text +#include <linux/linkage.h> #include <asm/asm-offsets.h> #define a_offset 0 @@ -214,11 +215,7 @@ xor %r8d, d ## D;\ ror $1, d ## D; -.align 8 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) pushq R1 /* %rdi contains the ctx address */ @@ -269,8 +266,9 @@ twofish_enc_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) pushq R1 /* %rdi contains the ctx address */ @@ -320,3 +318,4 @@ twofish_dec_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_dec_blk) diff --git a/crypto/Kconfig b/crypto/Kconfig index 0880a14..05c0ce5 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -353,6 +353,27 @@ config CRYPTO_CRC32C_SPARC64 CRC32c CRC algorithm implemented using sparc64 crypto instructions, when available. +config CRYPTO_CRC32 + tristate "CRC32 CRC algorithm" + select CRYPTO_HASH + select CRC32 + help + CRC-32-IEEE 802.3 cyclic redundancy-check algorithm. + Shash crypto api wrappers to crc32_le function. + +config CRYPTO_CRC32_PCLMUL + tristate "CRC32 PCLMULQDQ hardware acceleration" + depends on X86 + select CRYPTO_HASH + select CRC32 + help + From Intel Westmere and AMD Bulldozer processor with SSE4.2 + and PCLMULQDQ supported, the processor will support + CRC32 PCLMULQDQ implementation using hardware accelerated PCLMULQDQ + instruction. This option will create 'crc32-plcmul' module, + which will enable any routine to use the CRC-32-IEEE 802.3 checksum + and gain better performance as compared with the table implementation. + config CRYPTO_GHASH tristate "GHASH digest algorithm" select CRYPTO_GF128MUL diff --git a/crypto/Makefile b/crypto/Makefile index d59dec7..be1a1be 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o +obj-$(CONFIG_CRYPTO_CRC32) += crc32.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_842) += 842.o diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 533de95..7d4a8d2 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -388,9 +388,9 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_blkcipher rblkcipher; - snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "ablkcipher"); - snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s", - alg->cra_ablkcipher.geniv ?: "<default>"); + strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type)); + strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<default>", + sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; @@ -469,9 +469,9 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_blkcipher rblkcipher; - snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "givcipher"); - snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s", - alg->cra_ablkcipher.geniv ?: "<built-in>"); + strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type)); + strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<built-in>", + sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize; diff --git a/crypto/aead.c b/crypto/aead.c index 0b8121e..547491e 100644 --- a/crypto/aead.c +++ b/crypto/aead.c @@ -117,9 +117,8 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_report_aead raead; struct aead_alg *aead = &alg->cra_aead; - snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "aead"); - snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s", - aead->geniv ?: "<built-in>"); + strncpy(raead.type, "aead", sizeof(raead.type)); + strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv)); raead.blocksize = alg->cra_blocksize; raead.maxauthsize = aead->maxauthsize; @@ -203,8 +202,8 @@ static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_report_aead raead; struct aead_alg *aead = &alg->cra_aead; - snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "nivaead"); - snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s", aead->geniv); + strncpy(raead.type, "nivaead", sizeof(raead.type)); + strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv)); raead.blocksize = alg->cra_blocksize; raead.maxauthsize = aead->maxauthsize; @@ -282,18 +281,16 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl, int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) & algt->mask) return ERR_PTR(-EINVAL); name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(name); if (IS_ERR(name)) - return ERR_PTR(err); + return ERR_CAST(name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) diff --git a/crypto/ahash.c b/crypto/ahash.c index 3887856..793a27f 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -404,7 +404,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_hash rhash; - snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "ahash"); + strncpy(rhash.type, "ahash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize; diff --git a/crypto/algapi.c b/crypto/algapi.c index c3b9bfe..08c57c8 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -749,12 +749,10 @@ struct crypto_alg *crypto_attr_alg2(struct rtattr *rta, u32 type, u32 mask) { const char *name; - int err; name = crypto_attr_alg_name(rta); - err = PTR_ERR(name); if (IS_ERR(name)) - return ERR_PTR(err); + return ERR_CAST(name); return crypto_find_alg(name, frontend, type, mask); } diff --git a/crypto/authenc.c b/crypto/authenc.c index d0583a4..ffce19d 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -592,9 +592,8 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); diff --git a/crypto/authencesn.c b/crypto/authencesn.c index 136b68b..ab53762 100644 --- a/crypto/authencesn.c +++ b/crypto/authencesn.c @@ -715,9 +715,8 @@ static struct crypto_instance *crypto_authenc_esn_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a8d85a1..a79e7e9 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -499,9 +499,9 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_blkcipher rblkcipher; - snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "blkcipher"); - snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s", - alg->cra_blkcipher.geniv ?: "<default>"); + strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type)); + strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "<default>", + sizeof(rblkcipher.geniv)); rblkcipher.blocksize = alg->cra_blocksize; rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize; @@ -588,18 +588,16 @@ struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl, int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) & algt->mask) return ERR_PTR(-EINVAL); name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(name); if (IS_ERR(name)) - return ERR_PTR(err); + return ERR_CAST(name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) diff --git a/crypto/ccm.c b/crypto/ccm.c index 32fe1bb..499c917 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -484,18 +484,16 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb, int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); cipher = crypto_alg_mod_lookup(cipher_name, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); - err = PTR_ERR(cipher); if (IS_ERR(cipher)) - return ERR_PTR(err); + return ERR_CAST(cipher); err = -EINVAL; if (cipher->cra_blocksize != 16) @@ -573,15 +571,13 @@ out_put_cipher: static struct crypto_instance *crypto_ccm_alloc(struct rtattr **tb) { - int err; const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(cipher_name); if (IS_ERR(cipher_name)) - return ERR_PTR(err); + return ERR_CAST(cipher_name); if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -612,20 +608,17 @@ static struct crypto_template crypto_ccm_tmpl = { static struct crypto_instance *crypto_ccm_base_alloc(struct rtattr **tb) { - int err; const char *ctr_name; const char *cipher_name; char full_name[CRYPTO_MAX_ALG_NAME]; ctr_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(ctr_name); if (IS_ERR(ctr_name)) - return ERR_PTR(err); + return ERR_CAST(ctr_name); cipher_name = crypto_attr_alg_name(tb[2]); - err = PTR_ERR(cipher_name); if (IS_ERR(cipher_name)) - return ERR_PTR(err); + return ERR_CAST(cipher_name); if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -760,17 +753,15 @@ static struct crypto_instance *crypto_rfc4309_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); ccm_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(ccm_name); if (IS_ERR(ccm_name)) - return ERR_PTR(err); + return ERR_CAST(ccm_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) diff --git a/crypto/chainiv.c b/crypto/chainiv.c index ba200b0..834d8dd 100644 --- a/crypto/chainiv.c +++ b/crypto/chainiv.c @@ -291,9 +291,8 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); err = crypto_get_default_rng(); if (err) diff --git a/crypto/crc32.c b/crypto/crc32.c new file mode 100644 index 0000000..9d1c415 --- /dev/null +++ b/crypto/crc32.c @@ -0,0 +1,158 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + */ + +/* + * This is crypto api shash wrappers to crc32_le. + */ + +#include <linux/crc32.h> +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/string.h> +#include <linux/kernel.h> + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + return crc32_le(crc, p, len); +} + +/** No default init with ~0 */ +static int crc32_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + + +/* + * Setting the seed allows arbitrary accumulators and flexible XOR policy + * If your algorithm starts with ~0, then XOR with ~0 before you set + * the seed. + */ +static int crc32_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = __crc32_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len)); + return 0; +} + +static int crc32_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} +static struct shash_alg alg = { + .setkey = crc32_setkey, + .init = crc32_init, + .update = crc32_update, + .final = crc32_final, + .finup = crc32_finup, + .digest = crc32_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-table", + .cra_priority = 100, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_cra_init, + } +}; + +static int __init crc32_mod_init(void) +{ + return crypto_register_shash(&alg); +} + +static void __exit crc32_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32_mod_init); +module_exit(crc32_mod_fini); + +MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>"); +MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32"); +MODULE_LICENSE("GPL"); diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 35d700a..dfd511f 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -30,6 +30,8 @@ #include "internal.h" +#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x)) + static DEFINE_MUTEX(crypto_cfg_mutex); /* The crypto netlink socket */ @@ -75,7 +77,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - snprintf(rcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "cipher"); + strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -94,8 +96,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - snprintf(rcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "compression"); - + strncpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -108,12 +109,14 @@ nla_put_failure: static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - memcpy(&ualg->cru_name, &alg->cra_name, sizeof(ualg->cru_name)); - memcpy(&ualg->cru_driver_name, &alg->cra_driver_name, - sizeof(ualg->cru_driver_name)); - memcpy(&ualg->cru_module_name, module_name(alg->cra_module), - CRYPTO_MAX_ALG_NAME); - + strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strncpy(ualg->cru_driver_name, alg->cra_driver_name, + sizeof(ualg->cru_driver_name)); + strncpy(ualg->cru_module_name, module_name(alg->cra_module), + sizeof(ualg->cru_module_name)); + + ualg->cru_type = 0; + ualg->cru_mask = 0; ualg->cru_flags = alg->cra_flags; ualg->cru_refcnt = atomic_read(&alg->cra_refcnt); @@ -122,8 +125,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - snprintf(rl.type, CRYPTO_MAX_ALG_NAME, "%s", "larval"); - + strncpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; @@ -196,7 +198,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct crypto_dump_info info; int err; - if (!p->cru_driver_name) + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + + if (!p->cru_driver_name[0]) return -EINVAL; alg = crypto_alg_match(p, 1); @@ -260,6 +265,9 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; LIST_HEAD(list); + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + if (priority && !strlen(p->cru_driver_name)) return -EINVAL; @@ -287,6 +295,9 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct crypto_alg *alg; struct crypto_user_alg *p = nlmsg_data(nlh); + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + alg = crypto_alg_match(p, 1); if (!alg) return -ENOENT; @@ -368,6 +379,9 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh, struct crypto_user_alg *p = nlmsg_data(nlh); struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL]; + if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name)) + return -EINVAL; + if (strlen(p->cru_driver_name)) exact = 1; diff --git a/crypto/ctr.c b/crypto/ctr.c index 1f2997c..f2b94f2 100644 --- a/crypto/ctr.c +++ b/crypto/ctr.c @@ -343,17 +343,15 @@ static struct crypto_instance *crypto_rfc3686_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & algt->mask) return ERR_PTR(-EINVAL); cipher_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(cipher_name); if (IS_ERR(cipher_name)) - return ERR_PTR(err); + return ERR_CAST(cipher_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) diff --git a/crypto/cts.c b/crypto/cts.c index ccf9c5d..042223f 100644 --- a/crypto/cts.c +++ b/crypto/cts.c @@ -282,9 +282,8 @@ static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb) alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK); - err = PTR_ERR(alg); if (IS_ERR(alg)) - return ERR_PTR(err); + return ERR_CAST(alg); inst = ERR_PTR(-EINVAL); if (!is_power_of_2(alg->cra_blocksize)) diff --git a/crypto/gcm.c b/crypto/gcm.c index 1a25263..137ad1e 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -701,9 +701,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb, int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); @@ -711,9 +710,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb, ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type, CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_AHASH_MASK); - err = PTR_ERR(ghash_alg); if (IS_ERR(ghash_alg)) - return ERR_PTR(err); + return ERR_CAST(ghash_alg); err = -ENOMEM; inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL); @@ -787,15 +785,13 @@ out_put_ghash: static struct crypto_instance *crypto_gcm_alloc(struct rtattr **tb) { - int err; const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; char full_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(cipher_name); if (IS_ERR(cipher_name)) - return ERR_PTR(err); + return ERR_CAST(cipher_name); if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) @@ -826,20 +822,17 @@ static struct crypto_template crypto_gcm_tmpl = { static struct crypto_instance *crypto_gcm_base_alloc(struct rtattr **tb) { - int err; const char *ctr_name; const char *ghash_name; char full_name[CRYPTO_MAX_ALG_NAME]; ctr_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(ctr_name); if (IS_ERR(ctr_name)) - return ERR_PTR(err); + return ERR_CAST(ctr_name); ghash_name = crypto_attr_alg_name(tb[2]); - err = PTR_ERR(ghash_name); if (IS_ERR(ghash_name)) - return ERR_PTR(err); + return ERR_CAST(ghash_name); if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)", ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME) @@ -971,17 +964,15 @@ static struct crypto_instance *crypto_rfc4106_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); ccm_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(ccm_name); if (IS_ERR(ccm_name)) - return ERR_PTR(err); + return ERR_CAST(ccm_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) @@ -1222,17 +1213,15 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask) return ERR_PTR(-EINVAL); ccm_name = crypto_attr_alg_name(tb[1]); - err = PTR_ERR(ccm_name); if (IS_ERR(ccm_name)) - return ERR_PTR(err); + return ERR_CAST(ccm_name); inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) diff --git a/crypto/pcompress.c b/crypto/pcompress.c index 04e083f..7140fe7 100644 --- a/crypto/pcompress.c +++ b/crypto/pcompress.c @@ -53,8 +53,7 @@ static int crypto_pcomp_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rpcomp; - snprintf(rpcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "pcomp"); - + strncpy(rpcomp.type, "pcomp", sizeof(rpcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rpcomp)) goto nla_put_failure; diff --git a/crypto/rng.c b/crypto/rng.c index f3b7894..e0a25c2 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -65,7 +65,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_rng rrng; - snprintf(rrng.type, CRYPTO_MAX_ALG_NAME, "%s", "rng"); + strncpy(rrng.type, "rng", sizeof(rrng.type)); rrng.seedsize = alg->cra_rng.seedsize; diff --git a/crypto/seqiv.c b/crypto/seqiv.c index 4c44912..f2cba4ed 100644 --- a/crypto/seqiv.c +++ b/crypto/seqiv.c @@ -305,9 +305,8 @@ static struct crypto_instance *seqiv_alloc(struct rtattr **tb) int err; algt = crypto_get_attr_type(tb); - err = PTR_ERR(algt); if (IS_ERR(algt)) - return ERR_PTR(err); + return ERR_CAST(algt); err = crypto_get_default_rng(); if (err) diff --git a/crypto/shash.c b/crypto/shash.c index f426330f..929058a 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -530,7 +530,8 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_report_hash rhash; struct shash_alg *salg = __crypto_shash_alg(alg); - snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "shash"); + strncpy(rhash.type, "shash", sizeof(rhash.type)); + rhash.blocksize = alg->cra_blocksize; rhash.digestsize = salg->digestsize; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index edf4a08..efd8b20 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2269,6 +2269,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "ecb(fcrypt)", + .test = alg_test_skcipher, + .suite = { + .cipher = { + .enc = { + .vecs = fcrypt_pcbc_enc_tv_template, + .count = 1 + }, + .dec = { + .vecs = fcrypt_pcbc_dec_tv_template, + .count = 1 + } + } + } + }, { .alg = "ecb(khazad)", .test = alg_test_skcipher, .suite = { diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index c9d9d5c..6f22ba5 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -332,7 +332,7 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd) return -EINVAL; dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg); - if (!dd->nb_in_sg) + if (!dd->nb_out_sg) return -EINVAL; dd->bufcnt = sg_copy_to_buffer(dd->in_sg, dd->nb_in_sg, diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index a22f1a9..827913d 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -694,7 +694,7 @@ out_error_dma: dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma); free_dma(crc->dma_ch); out_error_irq: - free_irq(crc->irq, crc->dev); + free_irq(crc->irq, crc); out_error_unmap: iounmap((void *)crc->regs); out_error_free_mem: @@ -720,10 +720,10 @@ static int bfin_crypto_crc_remove(struct platform_device *pdev) crypto_unregister_ahash(&algs); tasklet_kill(&crc->done_task); - iounmap((void *)crc->regs); free_dma(crc->dma_ch); if (crc->irq > 0) - free_irq(crc->irq, crc->dev); + free_irq(crc->irq, crc); + iounmap((void *)crc->regs); kfree(crc); return 0; diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index e66e8ee..6aa425f 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -5,6 +5,7 @@ * * Copyright (c) 2010 Nokia Corporation * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com> + * Copyright (c) 2011 Texas Instruments Incorporated * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published @@ -19,28 +20,39 @@ #include <linux/init.h> #include <linux/errno.h> #include <linux/kernel.h> -#include <linux/clk.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> #include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/omap-dma.h> +#include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_address.h> #include <linux/io.h> #include <linux/crypto.h> #include <linux/interrupt.h> #include <crypto/scatterwalk.h> #include <crypto/aes.h> -#include <linux/omap-dma.h> +#define DST_MAXBURST 4 +#define DMA_MIN (DST_MAXBURST * sizeof(u32)) /* OMAP TRM gives bitfields as start:end, where start is the higher bit number. For example 7:0 */ #define FLD_MASK(start, end) (((1 << ((start) - (end) + 1)) - 1) << (end)) #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end)) -#define AES_REG_KEY(x) (0x1C - ((x ^ 0x01) * 0x04)) -#define AES_REG_IV(x) (0x20 + ((x) * 0x04)) +#define AES_REG_KEY(dd, x) ((dd)->pdata->key_ofs - \ + ((x ^ 0x01) * 0x04)) +#define AES_REG_IV(dd, x) ((dd)->pdata->iv_ofs + ((x) * 0x04)) -#define AES_REG_CTRL 0x30 -#define AES_REG_CTRL_CTR_WIDTH (1 << 7) +#define AES_REG_CTRL(dd) ((dd)->pdata->ctrl_ofs) +#define AES_REG_CTRL_CTR_WIDTH_MASK (3 << 7) +#define AES_REG_CTRL_CTR_WIDTH_32 (0 << 7) +#define AES_REG_CTRL_CTR_WIDTH_64 (1 << 7) +#define AES_REG_CTRL_CTR_WIDTH_96 (2 << 7) +#define AES_REG_CTRL_CTR_WIDTH_128 (3 << 7) #define AES_REG_CTRL_CTR (1 << 6) #define AES_REG_CTRL_CBC (1 << 5) #define AES_REG_CTRL_KEY_SIZE (3 << 3) @@ -48,14 +60,11 @@ #define AES_REG_CTRL_INPUT_READY (1 << 1) #define AES_REG_CTRL_OUTPUT_READY (1 << 0) -#define AES_REG_DATA 0x34 -#define AES_REG_DATA_N(x) (0x34 + ((x) * 0x04)) +#define AES_REG_DATA_N(dd, x) ((dd)->pdata->data_ofs + ((x) * 0x04)) -#define AES_REG_REV 0x44 -#define AES_REG_REV_MAJOR 0xF0 -#define AES_REG_REV_MINOR 0x0F +#define AES_REG_REV(dd) ((dd)->pdata->rev_ofs) -#define AES_REG_MASK 0x48 +#define AES_REG_MASK(dd) ((dd)->pdata->mask_ofs) #define AES_REG_MASK_SIDLE (1 << 6) #define AES_REG_MASK_START (1 << 5) #define AES_REG_MASK_DMA_OUT_EN (1 << 3) @@ -63,8 +72,7 @@ #define AES_REG_MASK_SOFTRESET (1 << 1) #define AES_REG_AUTOIDLE (1 << 0) -#define AES_REG_SYSSTATUS 0x4C -#define AES_REG_SYSSTATUS_RESETDONE (1 << 0) +#define AES_REG_LENGTH_N(x) (0x54 + ((x) * 0x04)) #define DEFAULT_TIMEOUT (5*HZ) @@ -72,6 +80,7 @@ #define FLAGS_ENCRYPT BIT(0) #define FLAGS_CBC BIT(1) #define FLAGS_GIV BIT(2) +#define FLAGS_CTR BIT(3) #define FLAGS_INIT BIT(4) #define FLAGS_FAST BIT(5) @@ -92,11 +101,39 @@ struct omap_aes_reqctx { #define OMAP_AES_QUEUE_LENGTH 1 #define OMAP_AES_CACHE_SIZE 0 +struct omap_aes_algs_info { + struct crypto_alg *algs_list; + unsigned int size; + unsigned int registered; +}; + +struct omap_aes_pdata { + struct omap_aes_algs_info *algs_info; + unsigned int algs_info_size; + + void (*trigger)(struct omap_aes_dev *dd, int length); + + u32 key_ofs; + u32 iv_ofs; + u32 ctrl_ofs; + u32 data_ofs; + u32 rev_ofs; + u32 mask_ofs; + + u32 dma_enable_in; + u32 dma_enable_out; + u32 dma_start; + + u32 major_mask; + u32 major_shift; + u32 minor_mask; + u32 minor_shift; +}; + struct omap_aes_dev { struct list_head list; unsigned long phys_base; void __iomem *io_base; - struct clk *iclk; struct omap_aes_ctx *ctx; struct device *dev; unsigned long flags; @@ -111,20 +148,24 @@ struct omap_aes_dev { struct ablkcipher_request *req; size_t total; struct scatterlist *in_sg; + struct scatterlist in_sgl; size_t in_offset; struct scatterlist *out_sg; + struct scatterlist out_sgl; size_t out_offset; size_t buflen; void *buf_in; size_t dma_size; int dma_in; - int dma_lch_in; + struct dma_chan *dma_lch_in; dma_addr_t dma_addr_in; void *buf_out; int dma_out; - int dma_lch_out; + struct dma_chan *dma_lch_out; dma_addr_t dma_addr_out; + + const struct omap_aes_pdata *pdata; }; /* keep registered devices data here */ @@ -160,19 +201,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset, omap_aes_write(dd, offset, *value); } -static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit) -{ - unsigned long timeout = jiffies + DEFAULT_TIMEOUT; - - while (!(omap_aes_read(dd, offset) & bit)) { - if (time_is_before_jiffies(timeout)) { - dev_err(dd->dev, "omap-aes timeout\n"); - return -ETIMEDOUT; - } - } - return 0; -} - static int omap_aes_hw_init(struct omap_aes_dev *dd) { /* @@ -180,23 +208,9 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) * It may be long delays between requests. * Device might go to off mode to save power. */ - clk_enable(dd->iclk); + pm_runtime_get_sync(dd->dev); if (!(dd->flags & FLAGS_INIT)) { - /* is it necessary to reset before every operation? */ - omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET, - AES_REG_MASK_SOFTRESET); - /* - * prevent OCP bus error (SRESP) in case an access to the module - * is performed while the module is coming out of soft reset - */ - __asm__ __volatile__("nop"); - __asm__ __volatile__("nop"); - - if (omap_aes_wait(dd, AES_REG_SYSSTATUS, - AES_REG_SYSSTATUS_RESETDONE)) - return -ETIMEDOUT; - dd->flags |= FLAGS_INIT; dd->err = 0; } @@ -208,59 +222,75 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd) { unsigned int key32; int i, err; - u32 val, mask; + u32 val, mask = 0; err = omap_aes_hw_init(dd); if (err) return err; - val = 0; - if (dd->dma_lch_out >= 0) - val |= AES_REG_MASK_DMA_OUT_EN; - if (dd->dma_lch_in >= 0) - val |= AES_REG_MASK_DMA_IN_EN; - - mask = AES_REG_MASK_DMA_IN_EN | AES_REG_MASK_DMA_OUT_EN; - - omap_aes_write_mask(dd, AES_REG_MASK, val, mask); - key32 = dd->ctx->keylen / sizeof(u32); /* it seems a key should always be set even if it has not changed */ for (i = 0; i < key32; i++) { - omap_aes_write(dd, AES_REG_KEY(i), + omap_aes_write(dd, AES_REG_KEY(dd, i), __le32_to_cpu(dd->ctx->key[i])); } - if ((dd->flags & FLAGS_CBC) && dd->req->info) - omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4); + if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info) + omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4); val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); if (dd->flags & FLAGS_CBC) val |= AES_REG_CTRL_CBC; + if (dd->flags & FLAGS_CTR) { + val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_32; + mask = AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_MASK; + } if (dd->flags & FLAGS_ENCRYPT) val |= AES_REG_CTRL_DIRECTION; - mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION | + mask |= AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION | AES_REG_CTRL_KEY_SIZE; - omap_aes_write_mask(dd, AES_REG_CTRL, val, mask); + omap_aes_write_mask(dd, AES_REG_CTRL(dd), val, mask); - /* IN */ - omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); + return 0; +} - omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); +static void omap_aes_dma_trigger_omap2(struct omap_aes_dev *dd, int length) +{ + u32 mask, val; - /* OUT */ - omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); + val = dd->pdata->dma_start; - omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + if (dd->dma_lch_out != NULL) + val |= dd->pdata->dma_enable_out; + if (dd->dma_lch_in != NULL) + val |= dd->pdata->dma_enable_in; + + mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in | + dd->pdata->dma_start; + + omap_aes_write_mask(dd, AES_REG_MASK(dd), val, mask); - return 0; +} + +static void omap_aes_dma_trigger_omap4(struct omap_aes_dev *dd, int length) +{ + omap_aes_write(dd, AES_REG_LENGTH_N(0), length); + omap_aes_write(dd, AES_REG_LENGTH_N(1), 0); + + omap_aes_dma_trigger_omap2(dd, length); +} + +static void omap_aes_dma_stop(struct omap_aes_dev *dd) +{ + u32 mask; + + mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in | + dd->pdata->dma_start; + + omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask); } static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx) @@ -284,18 +314,10 @@ static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx) return dd; } -static void omap_aes_dma_callback(int lch, u16 ch_status, void *data) +static void omap_aes_dma_out_callback(void *data) { struct omap_aes_dev *dd = data; - if (ch_status != OMAP_DMA_BLOCK_IRQ) { - pr_err("omap-aes DMA error status: 0x%hx\n", ch_status); - dd->err = -EIO; - dd->flags &= ~FLAGS_INIT; /* request to re-initialize */ - } else if (lch == dd->dma_lch_in) { - return; - } - /* dma_lch_out - completed */ tasklet_schedule(&dd->done_task); } @@ -303,9 +325,10 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data) static int omap_aes_dma_init(struct omap_aes_dev *dd) { int err = -ENOMEM; + dma_cap_mask_t mask; - dd->dma_lch_out = -1; - dd->dma_lch_in = -1; + dd->dma_lch_out = NULL; + dd->dma_lch_in = NULL; dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE); dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE); @@ -334,23 +357,31 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd) goto err_map_out; } - err = omap_request_dma(dd->dma_in, "omap-aes-rx", - omap_aes_dma_callback, dd, &dd->dma_lch_in); - if (err) { - dev_err(dd->dev, "Unable to request DMA channel\n"); + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + dd->dma_lch_in = dma_request_slave_channel_compat(mask, + omap_dma_filter_fn, + &dd->dma_in, + dd->dev, "rx"); + if (!dd->dma_lch_in) { + dev_err(dd->dev, "Unable to request in DMA channel\n"); goto err_dma_in; } - err = omap_request_dma(dd->dma_out, "omap-aes-tx", - omap_aes_dma_callback, dd, &dd->dma_lch_out); - if (err) { - dev_err(dd->dev, "Unable to request DMA channel\n"); + + dd->dma_lch_out = dma_request_slave_channel_compat(mask, + omap_dma_filter_fn, + &dd->dma_out, + dd->dev, "tx"); + if (!dd->dma_lch_out) { + dev_err(dd->dev, "Unable to request out DMA channel\n"); goto err_dma_out; } return 0; err_dma_out: - omap_free_dma(dd->dma_lch_in); + dma_release_channel(dd->dma_lch_in); err_dma_in: dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, DMA_FROM_DEVICE); @@ -367,8 +398,8 @@ err_alloc: static void omap_aes_dma_cleanup(struct omap_aes_dev *dd) { - omap_free_dma(dd->dma_lch_out); - omap_free_dma(dd->dma_lch_in); + dma_release_channel(dd->dma_lch_out); + dma_release_channel(dd->dma_lch_in); dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen, DMA_FROM_DEVICE); dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, DMA_TO_DEVICE); @@ -426,12 +457,15 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf, return off; } -static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, - dma_addr_t dma_addr_out, int length) +static int omap_aes_crypt_dma(struct crypto_tfm *tfm, + struct scatterlist *in_sg, struct scatterlist *out_sg) { struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm); struct omap_aes_dev *dd = ctx->dd; - int len32; + struct dma_async_tx_descriptor *tx_in, *tx_out; + struct dma_slave_config cfg; + dma_addr_t dma_addr_in = sg_dma_address(in_sg); + int ret, length = sg_dma_len(in_sg); pr_debug("len: %d\n", length); @@ -441,30 +475,61 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, dma_sync_single_for_device(dd->dev, dma_addr_in, length, DMA_TO_DEVICE); - len32 = DIV_ROUND_UP(length, sizeof(u32)); + memset(&cfg, 0, sizeof(cfg)); + + cfg.src_addr = dd->phys_base + AES_REG_DATA_N(dd, 0); + cfg.dst_addr = dd->phys_base + AES_REG_DATA_N(dd, 0); + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.src_maxburst = DST_MAXBURST; + cfg.dst_maxburst = DST_MAXBURST; /* IN */ - omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32, - len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in, - OMAP_DMA_DST_SYNC); + ret = dmaengine_slave_config(dd->dma_lch_in, &cfg); + if (ret) { + dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n", + ret); + return ret; + } + + tx_in = dmaengine_prep_slave_sg(dd->dma_lch_in, in_sg, 1, + DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_in) { + dev_err(dd->dev, "IN prep_slave_sg() failed\n"); + return -EINVAL; + } - omap_set_dma_src_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_POST_INC, - dma_addr_in, 0, 0); + /* No callback necessary */ + tx_in->callback_param = dd; /* OUT */ - omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32, - len32, 1, OMAP_DMA_SYNC_PACKET, - dd->dma_out, OMAP_DMA_SRC_SYNC); + ret = dmaengine_slave_config(dd->dma_lch_out, &cfg); + if (ret) { + dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n", + ret); + return ret; + } + + tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, 1, + DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_out) { + dev_err(dd->dev, "OUT prep_slave_sg() failed\n"); + return -EINVAL; + } - omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC, - dma_addr_out, 0, 0); + tx_out->callback = omap_aes_dma_out_callback; + tx_out->callback_param = dd; - omap_start_dma(dd->dma_lch_in); - omap_start_dma(dd->dma_lch_out); + dmaengine_submit(tx_in); + dmaengine_submit(tx_out); - /* start DMA or disable idle mode */ - omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, - AES_REG_MASK_START); + dma_async_issue_pending(dd->dma_lch_in); + dma_async_issue_pending(dd->dma_lch_out); + + /* start DMA */ + dd->pdata->trigger(dd, length); return 0; } @@ -476,6 +541,8 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) int err, fast = 0, in, out; size_t count; dma_addr_t addr_in, addr_out; + struct scatterlist *in_sg, *out_sg; + int len32; pr_debug("total: %d\n", dd->total); @@ -514,6 +581,9 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) addr_in = sg_dma_address(dd->in_sg); addr_out = sg_dma_address(dd->out_sg); + in_sg = dd->in_sg; + out_sg = dd->out_sg; + dd->flags |= FLAGS_FAST; } else { @@ -521,6 +591,27 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) count = sg_copy(&dd->in_sg, &dd->in_offset, dd->buf_in, dd->buflen, dd->total, 0); + len32 = DIV_ROUND_UP(count, DMA_MIN) * DMA_MIN; + + /* + * The data going into the AES module has been copied + * to a local buffer and the data coming out will go + * into a local buffer so set up local SG entries for + * both. + */ + sg_init_table(&dd->in_sgl, 1); + dd->in_sgl.offset = dd->in_offset; + sg_dma_len(&dd->in_sgl) = len32; + sg_dma_address(&dd->in_sgl) = dd->dma_addr_in; + + sg_init_table(&dd->out_sgl, 1); + dd->out_sgl.offset = dd->out_offset; + sg_dma_len(&dd->out_sgl) = len32; + sg_dma_address(&dd->out_sgl) = dd->dma_addr_out; + + in_sg = &dd->in_sgl; + out_sg = &dd->out_sgl; + addr_in = dd->dma_addr_in; addr_out = dd->dma_addr_out; @@ -530,7 +621,7 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) dd->total -= count; - err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count); + err = omap_aes_crypt_dma(tfm, in_sg, out_sg); if (err) { dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); @@ -545,7 +636,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) pr_debug("err: %d\n", err); - clk_disable(dd->iclk); + pm_runtime_put_sync(dd->dev); dd->flags &= ~FLAGS_BUSY; req->base.complete(&req->base, err); @@ -558,10 +649,10 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) pr_debug("total: %d\n", dd->total); - omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START); + omap_aes_dma_stop(dd); - omap_stop_dma(dd->dma_lch_in); - omap_stop_dma(dd->dma_lch_out); + dmaengine_terminate_all(dd->dma_lch_in); + dmaengine_terminate_all(dd->dma_lch_out); if (dd->flags & FLAGS_FAST) { dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); @@ -734,6 +825,16 @@ static int omap_aes_cbc_decrypt(struct ablkcipher_request *req) return omap_aes_crypt(req, FLAGS_CBC); } +static int omap_aes_ctr_encrypt(struct ablkcipher_request *req) +{ + return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CTR); +} + +static int omap_aes_ctr_decrypt(struct ablkcipher_request *req) +{ + return omap_aes_crypt(req, FLAGS_CTR); +} + static int omap_aes_cra_init(struct crypto_tfm *tfm) { pr_debug("enter\n"); @@ -750,7 +851,7 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm) /* ********************** ALGS ************************************ */ -static struct crypto_alg algs[] = { +static struct crypto_alg algs_ecb_cbc[] = { { .cra_name = "ecb(aes)", .cra_driver_name = "ecb-aes-omap", @@ -798,11 +899,213 @@ static struct crypto_alg algs[] = { } }; +static struct crypto_alg algs_ctr[] = { +{ + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-omap", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = omap_aes_cra_init, + .cra_exit = omap_aes_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .geniv = "eseqiv", + .ivsize = AES_BLOCK_SIZE, + .setkey = omap_aes_setkey, + .encrypt = omap_aes_ctr_encrypt, + .decrypt = omap_aes_ctr_decrypt, + } +} , +}; + +static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = { + { + .algs_list = algs_ecb_cbc, + .size = ARRAY_SIZE(algs_ecb_cbc), + }, +}; + +static const struct omap_aes_pdata omap_aes_pdata_omap2 = { + .algs_info = omap_aes_algs_info_ecb_cbc, + .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc), + .trigger = omap_aes_dma_trigger_omap2, + .key_ofs = 0x1c, + .iv_ofs = 0x20, + .ctrl_ofs = 0x30, + .data_ofs = 0x34, + .rev_ofs = 0x44, + .mask_ofs = 0x48, + .dma_enable_in = BIT(2), + .dma_enable_out = BIT(3), + .dma_start = BIT(5), + .major_mask = 0xf0, + .major_shift = 4, + .minor_mask = 0x0f, + .minor_shift = 0, +}; + +#ifdef CONFIG_OF +static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc_ctr[] = { + { + .algs_list = algs_ecb_cbc, + .size = ARRAY_SIZE(algs_ecb_cbc), + }, + { + .algs_list = algs_ctr, + .size = ARRAY_SIZE(algs_ctr), + }, +}; + +static const struct omap_aes_pdata omap_aes_pdata_omap3 = { + .algs_info = omap_aes_algs_info_ecb_cbc_ctr, + .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr), + .trigger = omap_aes_dma_trigger_omap2, + .key_ofs = 0x1c, + .iv_ofs = 0x20, + .ctrl_ofs = 0x30, + .data_ofs = 0x34, + .rev_ofs = 0x44, + .mask_ofs = 0x48, + .dma_enable_in = BIT(2), + .dma_enable_out = BIT(3), + .dma_start = BIT(5), + .major_mask = 0xf0, + .major_shift = 4, + .minor_mask = 0x0f, + .minor_shift = 0, +}; + +static const struct omap_aes_pdata omap_aes_pdata_omap4 = { + .algs_info = omap_aes_algs_info_ecb_cbc_ctr, + .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr), + .trigger = omap_aes_dma_trigger_omap4, + .key_ofs = 0x3c, + .iv_ofs = 0x40, + .ctrl_ofs = 0x50, + .data_ofs = 0x60, + .rev_ofs = 0x80, + .mask_ofs = 0x84, + .dma_enable_in = BIT(5), + .dma_enable_out = BIT(6), + .major_mask = 0x0700, + .major_shift = 8, + .minor_mask = 0x003f, + .minor_shift = 0, +}; + +static const struct of_device_id omap_aes_of_match[] = { + { + .compatible = "ti,omap2-aes", + .data = &omap_aes_pdata_omap2, + }, + { + .compatible = "ti,omap3-aes", + .data = &omap_aes_pdata_omap3, + }, + { + .compatible = "ti,omap4-aes", + .data = &omap_aes_pdata_omap4, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_aes_of_match); + +static int omap_aes_get_res_of(struct omap_aes_dev *dd, + struct device *dev, struct resource *res) +{ + struct device_node *node = dev->of_node; + const struct of_device_id *match; + int err = 0; + + match = of_match_device(of_match_ptr(omap_aes_of_match), dev); + if (!match) { + dev_err(dev, "no compatible OF match\n"); + err = -EINVAL; + goto err; + } + + err = of_address_to_resource(node, 0, res); + if (err < 0) { + dev_err(dev, "can't translate OF node address\n"); + err = -EINVAL; + goto err; + } + + dd->dma_out = -1; /* Dummy value that's unused */ + dd->dma_in = -1; /* Dummy value that's unused */ + + dd->pdata = match->data; + +err: + return err; +} +#else +static const struct of_device_id omap_aes_of_match[] = { + {}, +}; + +static int omap_aes_get_res_of(struct omap_aes_dev *dd, + struct device *dev, struct resource *res) +{ + return -EINVAL; +} +#endif + +static int omap_aes_get_res_pdev(struct omap_aes_dev *dd, + struct platform_device *pdev, struct resource *res) +{ + struct device *dev = &pdev->dev; + struct resource *r; + int err = 0; + + /* Get the base address */ + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto err; + } + memcpy(res, r, sizeof(*res)); + + /* Get the DMA out channel */ + r = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!r) { + dev_err(dev, "no DMA out resource info\n"); + err = -ENODEV; + goto err; + } + dd->dma_out = r->start; + + /* Get the DMA in channel */ + r = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (!r) { + dev_err(dev, "no DMA in resource info\n"); + err = -ENODEV; + goto err; + } + dd->dma_in = r->start; + + /* Only OMAP2/3 can be non-DT */ + dd->pdata = &omap_aes_pdata_omap2; + +err: + return err; +} + static int omap_aes_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct omap_aes_dev *dd; - struct resource *res; + struct crypto_alg *algp; + struct resource res; int err = -ENOMEM, i, j; u32 reg; @@ -817,49 +1120,31 @@ static int omap_aes_probe(struct platform_device *pdev) spin_lock_init(&dd->lock); crypto_init_queue(&dd->queue, OMAP_AES_QUEUE_LENGTH); - /* Get the base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "invalid resource type\n"); - err = -ENODEV; - goto err_res; - } - dd->phys_base = res->start; - - /* Get the DMA */ - res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!res) - dev_info(dev, "no DMA info\n"); - else - dd->dma_out = res->start; - - /* Get the DMA */ - res = platform_get_resource(pdev, IORESOURCE_DMA, 1); - if (!res) - dev_info(dev, "no DMA info\n"); - else - dd->dma_in = res->start; - - /* Initializing the clock */ - dd->iclk = clk_get(dev, "ick"); - if (IS_ERR(dd->iclk)) { - dev_err(dev, "clock intialization failed.\n"); - err = PTR_ERR(dd->iclk); + err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) : + omap_aes_get_res_pdev(dd, pdev, &res); + if (err) goto err_res; - } - dd->io_base = ioremap(dd->phys_base, SZ_4K); + dd->io_base = devm_request_and_ioremap(dev, &res); if (!dd->io_base) { dev_err(dev, "can't ioremap\n"); err = -ENOMEM; - goto err_io; + goto err_res; } + dd->phys_base = res.start; + + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + + omap_aes_dma_stop(dd); + + reg = omap_aes_read(dd, AES_REG_REV(dd)); + + pm_runtime_put_sync(dev); - clk_enable(dd->iclk); - reg = omap_aes_read(dd, AES_REG_REV); dev_info(dev, "OMAP AES hw accel rev: %u.%u\n", - (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR); - clk_disable(dd->iclk); + (reg & dd->pdata->major_mask) >> dd->pdata->major_shift, + (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift); tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd); tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd); @@ -873,26 +1158,32 @@ static int omap_aes_probe(struct platform_device *pdev) list_add_tail(&dd->list, &dev_list); spin_unlock(&list_lock); - for (i = 0; i < ARRAY_SIZE(algs); i++) { - pr_debug("i: %d\n", i); - err = crypto_register_alg(&algs[i]); - if (err) - goto err_algs; - } + for (i = 0; i < dd->pdata->algs_info_size; i++) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; + + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); + + err = crypto_register_alg(algp); + if (err) + goto err_algs; - pr_info("probe() done\n"); + dd->pdata->algs_info[i].registered++; + } + } return 0; err_algs: - for (j = 0; j < i; j++) - crypto_unregister_alg(&algs[j]); + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_alg( + &dd->pdata->algs_info[i].algs_list[j]); omap_aes_dma_cleanup(dd); err_dma: tasklet_kill(&dd->done_task); tasklet_kill(&dd->queue_task); - iounmap(dd->io_base); -err_io: - clk_put(dd->iclk); + pm_runtime_disable(dev); err_res: kfree(dd); dd = NULL; @@ -904,7 +1195,7 @@ err_data: static int omap_aes_remove(struct platform_device *pdev) { struct omap_aes_dev *dd = platform_get_drvdata(pdev); - int i; + int i, j; if (!dd) return -ENODEV; @@ -913,33 +1204,52 @@ static int omap_aes_remove(struct platform_device *pdev) list_del(&dd->list); spin_unlock(&list_lock); - for (i = 0; i < ARRAY_SIZE(algs); i++) - crypto_unregister_alg(&algs[i]); + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_alg( + &dd->pdata->algs_info[i].algs_list[j]); tasklet_kill(&dd->done_task); tasklet_kill(&dd->queue_task); omap_aes_dma_cleanup(dd); - iounmap(dd->io_base); - clk_put(dd->iclk); + pm_runtime_disable(dd->dev); kfree(dd); dd = NULL; return 0; } +#ifdef CONFIG_PM_SLEEP +static int omap_aes_suspend(struct device *dev) +{ + pm_runtime_put_sync(dev); + return 0; +} + +static int omap_aes_resume(struct device *dev) +{ + pm_runtime_get_sync(dev); + return 0; +} +#endif + +static const struct dev_pm_ops omap_aes_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_aes_suspend, omap_aes_resume) +}; + static struct platform_driver omap_aes_driver = { .probe = omap_aes_probe, .remove = omap_aes_remove, .driver = { .name = "omap-aes", .owner = THIS_MODULE, + .pm = &omap_aes_pm_ops, + .of_match_table = omap_aes_of_match, }, }; static int __init omap_aes_mod_init(void) { - pr_info("loading %s driver\n", "omap-aes"); - return platform_driver_register(&omap_aes_driver); } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 9e6947b..3d1611f 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -5,6 +5,7 @@ * * Copyright (c) 2010 Nokia Corporation * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com> + * Copyright (c) 2011 Texas Instruments Incorporated * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as published @@ -22,12 +23,18 @@ #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/clk.h> #include <linux/irq.h> #include <linux/io.h> #include <linux/platform_device.h> #include <linux/scatterlist.h> #include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/omap-dma.h> +#include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> #include <linux/delay.h> #include <linux/crypto.h> #include <linux/cryptohash.h> @@ -37,19 +44,17 @@ #include <crypto/hash.h> #include <crypto/internal/hash.h> -#include <linux/omap-dma.h> - -#ifdef CONFIG_ARCH_OMAP1 -#include <mach/irqs.h> -#endif - -#define SHA_REG_DIGEST(x) (0x00 + ((x) * 0x04)) -#define SHA_REG_DIN(x) (0x1C + ((x) * 0x04)) - #define SHA1_MD5_BLOCK_SIZE SHA1_BLOCK_SIZE #define MD5_DIGEST_SIZE 16 -#define SHA_REG_DIGCNT 0x14 +#define DST_MAXBURST 16 +#define DMA_MIN (DST_MAXBURST * sizeof(u32)) + +#define SHA_REG_IDIGEST(dd, x) ((dd)->pdata->idigest_ofs + ((x)*0x04)) +#define SHA_REG_DIN(dd, x) ((dd)->pdata->din_ofs + ((x) * 0x04)) +#define SHA_REG_DIGCNT(dd) ((dd)->pdata->digcnt_ofs) + +#define SHA_REG_ODIGEST(x) (0x00 + ((x) * 0x04)) #define SHA_REG_CTRL 0x18 #define SHA_REG_CTRL_LENGTH (0xFFFFFFFF << 5) @@ -59,19 +64,42 @@ #define SHA_REG_CTRL_INPUT_READY (1 << 1) #define SHA_REG_CTRL_OUTPUT_READY (1 << 0) -#define SHA_REG_REV 0x5C -#define SHA_REG_REV_MAJOR 0xF0 -#define SHA_REG_REV_MINOR 0x0F +#define SHA_REG_REV(dd) ((dd)->pdata->rev_ofs) -#define SHA_REG_MASK 0x60 +#define SHA_REG_MASK(dd) ((dd)->pdata->mask_ofs) #define SHA_REG_MASK_DMA_EN (1 << 3) #define SHA_REG_MASK_IT_EN (1 << 2) #define SHA_REG_MASK_SOFTRESET (1 << 1) #define SHA_REG_AUTOIDLE (1 << 0) -#define SHA_REG_SYSSTATUS 0x64 +#define SHA_REG_SYSSTATUS(dd) ((dd)->pdata->sysstatus_ofs) #define SHA_REG_SYSSTATUS_RESETDONE (1 << 0) +#define SHA_REG_MODE 0x44 +#define SHA_REG_MODE_HMAC_OUTER_HASH (1 << 7) +#define SHA_REG_MODE_HMAC_KEY_PROC (1 << 5) +#define SHA_REG_MODE_CLOSE_HASH (1 << 4) +#define SHA_REG_MODE_ALGO_CONSTANT (1 << 3) +#define SHA_REG_MODE_ALGO_MASK (3 << 1) +#define SHA_REG_MODE_ALGO_MD5_128 (0 << 1) +#define SHA_REG_MODE_ALGO_SHA1_160 (1 << 1) +#define SHA_REG_MODE_ALGO_SHA2_224 (2 << 1) +#define SHA_REG_MODE_ALGO_SHA2_256 (3 << 1) + +#define SHA_REG_LENGTH 0x48 + +#define SHA_REG_IRQSTATUS 0x118 +#define SHA_REG_IRQSTATUS_CTX_RDY (1 << 3) +#define SHA_REG_IRQSTATUS_PARTHASH_RDY (1 << 2) +#define SHA_REG_IRQSTATUS_INPUT_RDY (1 << 1) +#define SHA_REG_IRQSTATUS_OUTPUT_RDY (1 << 0) + +#define SHA_REG_IRQENA 0x11C +#define SHA_REG_IRQENA_CTX_RDY (1 << 3) +#define SHA_REG_IRQENA_PARTHASH_RDY (1 << 2) +#define SHA_REG_IRQENA_INPUT_RDY (1 << 1) +#define SHA_REG_IRQENA_OUTPUT_RDY (1 << 0) + #define DEFAULT_TIMEOUT_INTERVAL HZ /* mostly device flags */ @@ -82,20 +110,33 @@ #define FLAGS_INIT 4 #define FLAGS_CPU 5 #define FLAGS_DMA_READY 6 +#define FLAGS_AUTO_XOR 7 +#define FLAGS_BE32_SHA1 8 /* context flags */ #define FLAGS_FINUP 16 #define FLAGS_SG 17 -#define FLAGS_SHA1 18 -#define FLAGS_HMAC 19 -#define FLAGS_ERROR 20 -#define OP_UPDATE 1 -#define OP_FINAL 2 +#define FLAGS_MODE_SHIFT 18 +#define FLAGS_MODE_MASK (SHA_REG_MODE_ALGO_MASK \ + << (FLAGS_MODE_SHIFT - 1)) +#define FLAGS_MODE_MD5 (SHA_REG_MODE_ALGO_MD5_128 \ + << (FLAGS_MODE_SHIFT - 1)) +#define FLAGS_MODE_SHA1 (SHA_REG_MODE_ALGO_SHA1_160 \ + << (FLAGS_MODE_SHIFT - 1)) +#define FLAGS_MODE_SHA224 (SHA_REG_MODE_ALGO_SHA2_224 \ + << (FLAGS_MODE_SHIFT - 1)) +#define FLAGS_MODE_SHA256 (SHA_REG_MODE_ALGO_SHA2_256 \ + << (FLAGS_MODE_SHIFT - 1)) +#define FLAGS_HMAC 20 +#define FLAGS_ERROR 21 + +#define OP_UPDATE 1 +#define OP_FINAL 2 #define OMAP_ALIGN_MASK (sizeof(u32)-1) #define OMAP_ALIGNED __attribute__((aligned(sizeof(u32)))) -#define BUFLEN PAGE_SIZE +#define BUFLEN PAGE_SIZE struct omap_sham_dev; @@ -104,7 +145,7 @@ struct omap_sham_reqctx { unsigned long flags; unsigned long op; - u8 digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED; + u8 digest[SHA256_DIGEST_SIZE] OMAP_ALIGNED; size_t digcnt; size_t bufcnt; size_t buflen; @@ -112,6 +153,7 @@ struct omap_sham_reqctx { /* walk state */ struct scatterlist *sg; + struct scatterlist sgl; unsigned int offset; /* offset in current sg */ unsigned int total; /* total request */ @@ -120,8 +162,8 @@ struct omap_sham_reqctx { struct omap_sham_hmac_ctx { struct crypto_shash *shash; - u8 ipad[SHA1_MD5_BLOCK_SIZE]; - u8 opad[SHA1_MD5_BLOCK_SIZE]; + u8 ipad[SHA1_MD5_BLOCK_SIZE] OMAP_ALIGNED; + u8 opad[SHA1_MD5_BLOCK_SIZE] OMAP_ALIGNED; }; struct omap_sham_ctx { @@ -137,22 +179,56 @@ struct omap_sham_ctx { #define OMAP_SHAM_QUEUE_LENGTH 1 +struct omap_sham_algs_info { + struct ahash_alg *algs_list; + unsigned int size; + unsigned int registered; +}; + +struct omap_sham_pdata { + struct omap_sham_algs_info *algs_info; + unsigned int algs_info_size; + unsigned long flags; + int digest_size; + + void (*copy_hash)(struct ahash_request *req, int out); + void (*write_ctrl)(struct omap_sham_dev *dd, size_t length, + int final, int dma); + void (*trigger)(struct omap_sham_dev *dd, size_t length); + int (*poll_irq)(struct omap_sham_dev *dd); + irqreturn_t (*intr_hdlr)(int irq, void *dev_id); + + u32 odigest_ofs; + u32 idigest_ofs; + u32 din_ofs; + u32 digcnt_ofs; + u32 rev_ofs; + u32 mask_ofs; + u32 sysstatus_ofs; + + u32 major_mask; + u32 major_shift; + u32 minor_mask; + u32 minor_shift; +}; + struct omap_sham_dev { struct list_head list; unsigned long phys_base; struct device *dev; void __iomem *io_base; int irq; - struct clk *iclk; spinlock_t lock; int err; - int dma; - int dma_lch; + unsigned int dma; + struct dma_chan *dma_lch; struct tasklet_struct done_task; unsigned long flags; struct crypto_queue queue; struct ahash_request *req; + + const struct omap_sham_pdata *pdata; }; struct omap_sham_drv { @@ -200,21 +276,44 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit) return 0; } -static void omap_sham_copy_hash(struct ahash_request *req, int out) +static void omap_sham_copy_hash_omap2(struct ahash_request *req, int out) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = ctx->dd; u32 *hash = (u32 *)ctx->digest; int i; - /* MD5 is almost unused. So copy sha1 size to reduce code */ - for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) { + for (i = 0; i < dd->pdata->digest_size / sizeof(u32); i++) { if (out) - hash[i] = omap_sham_read(ctx->dd, - SHA_REG_DIGEST(i)); + hash[i] = omap_sham_read(dd, SHA_REG_IDIGEST(dd, i)); else - omap_sham_write(ctx->dd, - SHA_REG_DIGEST(i), hash[i]); + omap_sham_write(dd, SHA_REG_IDIGEST(dd, i), hash[i]); + } +} + +static void omap_sham_copy_hash_omap4(struct ahash_request *req, int out) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = ctx->dd; + int i; + + if (ctx->flags & BIT(FLAGS_HMAC)) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(dd->req); + struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm); + struct omap_sham_hmac_ctx *bctx = tctx->base; + u32 *opad = (u32 *)bctx->opad; + + for (i = 0; i < dd->pdata->digest_size / sizeof(u32); i++) { + if (out) + opad[i] = omap_sham_read(dd, + SHA_REG_ODIGEST(i)); + else + omap_sham_write(dd, SHA_REG_ODIGEST(i), + opad[i]); + } } + + omap_sham_copy_hash_omap2(req, out); } static void omap_sham_copy_ready_hash(struct ahash_request *req) @@ -222,34 +321,44 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req) struct omap_sham_reqctx *ctx = ahash_request_ctx(req); u32 *in = (u32 *)ctx->digest; u32 *hash = (u32 *)req->result; - int i; + int i, d, big_endian = 0; if (!hash) return; - if (likely(ctx->flags & BIT(FLAGS_SHA1))) { - /* SHA1 results are in big endian */ - for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) + switch (ctx->flags & FLAGS_MODE_MASK) { + case FLAGS_MODE_MD5: + d = MD5_DIGEST_SIZE / sizeof(u32); + break; + case FLAGS_MODE_SHA1: + /* OMAP2 SHA1 is big endian */ + if (test_bit(FLAGS_BE32_SHA1, &ctx->dd->flags)) + big_endian = 1; + d = SHA1_DIGEST_SIZE / sizeof(u32); + break; + case FLAGS_MODE_SHA224: + d = SHA224_DIGEST_SIZE / sizeof(u32); + break; + case FLAGS_MODE_SHA256: + d = SHA256_DIGEST_SIZE / sizeof(u32); + break; + default: + d = 0; + } + + if (big_endian) + for (i = 0; i < d; i++) hash[i] = be32_to_cpu(in[i]); - } else { - /* MD5 results are in little endian */ - for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++) + else + for (i = 0; i < d; i++) hash[i] = le32_to_cpu(in[i]); - } } static int omap_sham_hw_init(struct omap_sham_dev *dd) { - clk_enable(dd->iclk); + pm_runtime_get_sync(dd->dev); if (!test_bit(FLAGS_INIT, &dd->flags)) { - omap_sham_write_mask(dd, SHA_REG_MASK, - SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); - - if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, - SHA_REG_SYSSTATUS_RESETDONE)) - return -ETIMEDOUT; - set_bit(FLAGS_INIT, &dd->flags); dd->err = 0; } @@ -257,23 +366,23 @@ static int omap_sham_hw_init(struct omap_sham_dev *dd) return 0; } -static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, +static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length, int final, int dma) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); u32 val = length << 5, mask; if (likely(ctx->digcnt)) - omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt); + omap_sham_write(dd, SHA_REG_DIGCNT(dd), ctx->digcnt); - omap_sham_write_mask(dd, SHA_REG_MASK, + omap_sham_write_mask(dd, SHA_REG_MASK(dd), SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0), SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN); /* * Setting ALGO_CONST only for the first iteration * and CLOSE_HASH only for the last one. */ - if (ctx->flags & BIT(FLAGS_SHA1)) + if ((ctx->flags & FLAGS_MODE_MASK) == FLAGS_MODE_SHA1) val |= SHA_REG_CTRL_ALGO; if (!ctx->digcnt) val |= SHA_REG_CTRL_ALGO_CONST; @@ -286,6 +395,81 @@ static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask); } +static void omap_sham_trigger_omap2(struct omap_sham_dev *dd, size_t length) +{ +} + +static int omap_sham_poll_irq_omap2(struct omap_sham_dev *dd) +{ + return omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY); +} + +static void omap_sham_write_n(struct omap_sham_dev *dd, u32 offset, + u32 *value, int count) +{ + for (; count--; value++, offset += 4) + omap_sham_write(dd, offset, *value); +} + +static void omap_sham_write_ctrl_omap4(struct omap_sham_dev *dd, size_t length, + int final, int dma) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + u32 val, mask; + + /* + * Setting ALGO_CONST only for the first iteration and + * CLOSE_HASH only for the last one. Note that flags mode bits + * correspond to algorithm encoding in mode register. + */ + val = (ctx->flags & FLAGS_MODE_MASK) >> (FLAGS_MODE_SHIFT - 1); + if (!ctx->digcnt) { + struct crypto_ahash *tfm = crypto_ahash_reqtfm(dd->req); + struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm); + struct omap_sham_hmac_ctx *bctx = tctx->base; + + val |= SHA_REG_MODE_ALGO_CONSTANT; + + if (ctx->flags & BIT(FLAGS_HMAC)) { + val |= SHA_REG_MODE_HMAC_KEY_PROC; + omap_sham_write_n(dd, SHA_REG_ODIGEST(0), + (u32 *)bctx->ipad, + SHA1_BLOCK_SIZE / sizeof(u32)); + ctx->digcnt += SHA1_BLOCK_SIZE; + } + } + + if (final) { + val |= SHA_REG_MODE_CLOSE_HASH; + + if (ctx->flags & BIT(FLAGS_HMAC)) + val |= SHA_REG_MODE_HMAC_OUTER_HASH; + } + + mask = SHA_REG_MODE_ALGO_CONSTANT | SHA_REG_MODE_CLOSE_HASH | + SHA_REG_MODE_ALGO_MASK | SHA_REG_MODE_HMAC_OUTER_HASH | + SHA_REG_MODE_HMAC_KEY_PROC; + + dev_dbg(dd->dev, "ctrl: %08x, flags: %08lx\n", val, ctx->flags); + omap_sham_write_mask(dd, SHA_REG_MODE, val, mask); + omap_sham_write(dd, SHA_REG_IRQENA, SHA_REG_IRQENA_OUTPUT_RDY); + omap_sham_write_mask(dd, SHA_REG_MASK(dd), + SHA_REG_MASK_IT_EN | + (dma ? SHA_REG_MASK_DMA_EN : 0), + SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN); +} + +static void omap_sham_trigger_omap4(struct omap_sham_dev *dd, size_t length) +{ + omap_sham_write(dd, SHA_REG_LENGTH, length); +} + +static int omap_sham_poll_irq_omap4(struct omap_sham_dev *dd) +{ + return omap_sham_wait(dd, SHA_REG_IRQSTATUS, + SHA_REG_IRQSTATUS_INPUT_RDY); +} + static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, size_t length, int final) { @@ -296,12 +480,13 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - omap_sham_write_ctrl(dd, length, final, 0); + dd->pdata->write_ctrl(dd, length, final, 0); + dd->pdata->trigger(dd, length); /* should be non-zero before next lines to disable clocks later */ ctx->digcnt += length; - if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY)) + if (dd->pdata->poll_irq(dd)) return -ETIMEDOUT; if (final) @@ -312,30 +497,73 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, len32 = DIV_ROUND_UP(length, sizeof(u32)); for (count = 0; count < len32; count++) - omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]); + omap_sham_write(dd, SHA_REG_DIN(dd, count), buffer[count]); return -EINPROGRESS; } +static void omap_sham_dma_callback(void *param) +{ + struct omap_sham_dev *dd = param; + + set_bit(FLAGS_DMA_READY, &dd->flags); + tasklet_schedule(&dd->done_task); +} + static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, - size_t length, int final) + size_t length, int final, int is_sg) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - int len32; + struct dma_async_tx_descriptor *tx; + struct dma_slave_config cfg; + int len32, ret; dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - len32 = DIV_ROUND_UP(length, sizeof(u32)); + memset(&cfg, 0, sizeof(cfg)); + + cfg.dst_addr = dd->phys_base + SHA_REG_DIN(dd, 0); + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.dst_maxburst = DST_MAXBURST; + + ret = dmaengine_slave_config(dd->dma_lch, &cfg); + if (ret) { + pr_err("omap-sham: can't configure dmaengine slave: %d\n", ret); + return ret; + } + + len32 = DIV_ROUND_UP(length, DMA_MIN) * DMA_MIN; + + if (is_sg) { + /* + * The SG entry passed in may not have the 'length' member + * set correctly so use a local SG entry (sgl) with the + * proper value for 'length' instead. If this is not done, + * the dmaengine may try to DMA the incorrect amount of data. + */ + sg_init_table(&ctx->sgl, 1); + ctx->sgl.page_link = ctx->sg->page_link; + ctx->sgl.offset = ctx->sg->offset; + sg_dma_len(&ctx->sgl) = len32; + sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg); + + tx = dmaengine_prep_slave_sg(dd->dma_lch, &ctx->sgl, 1, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } else { + tx = dmaengine_prep_slave_single(dd->dma_lch, dma_addr, len32, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + } - omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32, - 1, OMAP_DMA_SYNC_PACKET, dd->dma, - OMAP_DMA_DST_SYNC_PREFETCH); + if (!tx) { + dev_err(dd->dev, "prep_slave_sg/single() failed\n"); + return -EINVAL; + } - omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC, - dma_addr, 0, 0); + tx->callback = omap_sham_dma_callback; + tx->callback_param = dd; - omap_sham_write_ctrl(dd, length, final, 1); + dd->pdata->write_ctrl(dd, length, final, 1); ctx->digcnt += length; @@ -344,7 +572,10 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, set_bit(FLAGS_DMA_ACTIVE, &dd->flags); - omap_start_dma(dd->dma_lch); + dmaengine_submit(tx); + dma_async_issue_pending(dd->dma_lch); + + dd->pdata->trigger(dd, length); return -EINPROGRESS; } @@ -391,6 +622,8 @@ static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd, struct omap_sham_reqctx *ctx, size_t length, int final) { + int ret; + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, DMA_TO_DEVICE); if (dma_mapping_error(dd->dev, ctx->dma_addr)) { @@ -400,8 +633,12 @@ static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd, ctx->flags &= ~BIT(FLAGS_SG); - /* next call does not fail... so no unmap in the case of error */ - return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final); + ret = omap_sham_xmit_dma(dd, ctx->dma_addr, length, final, 0); + if (ret != -EINPROGRESS) + dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, + DMA_TO_DEVICE); + + return ret; } static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) @@ -436,6 +673,7 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd) struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); unsigned int length, final, tail; struct scatterlist *sg; + int ret; if (!ctx->total) return 0; @@ -443,6 +681,15 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd) if (ctx->bufcnt || ctx->offset) return omap_sham_update_dma_slow(dd); + /* + * Don't use the sg interface when the transfer size is less + * than the number of elements in a DMA frame. Otherwise, + * the dmaengine infrastructure will calculate that it needs + * to transfer 0 frames which ultimately fails. + */ + if (ctx->total < (DST_MAXBURST * sizeof(u32))) + return omap_sham_update_dma_slow(dd); + dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", ctx->digcnt, ctx->bufcnt, ctx->total); @@ -480,8 +727,11 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd) final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total; - /* next call does not fail... so no unmap in the case of error */ - return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final); + ret = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final, 1); + if (ret != -EINPROGRESS) + dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + + return ret; } static int omap_sham_update_cpu(struct omap_sham_dev *dd) @@ -500,7 +750,8 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - omap_stop_dma(dd->dma_lch); + dmaengine_terminate_all(dd->dma_lch); + if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); if (ctx->sg->length == ctx->offset) { @@ -542,18 +793,33 @@ static int omap_sham_init(struct ahash_request *req) dev_dbg(dd->dev, "init: digest size: %d\n", crypto_ahash_digestsize(tfm)); - if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE) - ctx->flags |= BIT(FLAGS_SHA1); + switch (crypto_ahash_digestsize(tfm)) { + case MD5_DIGEST_SIZE: + ctx->flags |= FLAGS_MODE_MD5; + break; + case SHA1_DIGEST_SIZE: + ctx->flags |= FLAGS_MODE_SHA1; + break; + case SHA224_DIGEST_SIZE: + ctx->flags |= FLAGS_MODE_SHA224; + break; + case SHA256_DIGEST_SIZE: + ctx->flags |= FLAGS_MODE_SHA256; + break; + } ctx->bufcnt = 0; ctx->digcnt = 0; ctx->buflen = BUFLEN; if (tctx->flags & BIT(FLAGS_HMAC)) { - struct omap_sham_hmac_ctx *bctx = tctx->base; + if (!test_bit(FLAGS_AUTO_XOR, &dd->flags)) { + struct omap_sham_hmac_ctx *bctx = tctx->base; + + memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE); + ctx->bufcnt = SHA1_MD5_BLOCK_SIZE; + } - memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE); - ctx->bufcnt = SHA1_MD5_BLOCK_SIZE; ctx->flags |= BIT(FLAGS_HMAC); } @@ -587,7 +853,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) struct omap_sham_reqctx *ctx = ahash_request_ctx(req); int err = 0, use_dma = 1; - if (ctx->bufcnt <= 64) + if (ctx->bufcnt <= DMA_MIN) /* faster to handle last block with cpu */ use_dma = 0; @@ -630,7 +896,8 @@ static int omap_sham_finish(struct ahash_request *req) if (ctx->digcnt) { omap_sham_copy_ready_hash(req); - if (ctx->flags & BIT(FLAGS_HMAC)) + if ((ctx->flags & BIT(FLAGS_HMAC)) && + !test_bit(FLAGS_AUTO_XOR, &dd->flags)) err = omap_sham_finish_hmac(req); } @@ -645,7 +912,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) struct omap_sham_dev *dd = ctx->dd; if (!err) { - omap_sham_copy_hash(req, 1); + dd->pdata->copy_hash(req, 1); if (test_bit(FLAGS_FINAL, &dd->flags)) err = omap_sham_finish(req); } else { @@ -655,7 +922,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) /* atomic operation is not needed here */ dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - clk_disable(dd->iclk); + + pm_runtime_put_sync(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -702,19 +970,9 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd, if (err) goto err1; - omap_set_dma_dest_params(dd->dma_lch, 0, - OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + SHA_REG_DIN(0), 0, 16); - - omap_set_dma_dest_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_16); - - omap_set_dma_src_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_4); - if (ctx->digcnt) /* request has changed - restore hash */ - omap_sham_copy_hash(req, 0); + dd->pdata->copy_hash(req, 0); if (ctx->op == OP_UPDATE) { err = omap_sham_update_req(dd); @@ -853,7 +1111,21 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, struct omap_sham_hmac_ctx *bctx = tctx->base; int bs = crypto_shash_blocksize(bctx->shash); int ds = crypto_shash_digestsize(bctx->shash); + struct omap_sham_dev *dd = NULL, *tmp; int err, i; + + spin_lock_bh(&sham.lock); + if (!tctx->dd) { + list_for_each_entry(tmp, &sham.dev_list, list) { + dd = tmp; + break; + } + tctx->dd = dd; + } else { + dd = tctx->dd; + } + spin_unlock_bh(&sham.lock); + err = crypto_shash_setkey(tctx->fallback, key, keylen); if (err) return err; @@ -870,11 +1142,14 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, } memset(bctx->ipad + keylen, 0, bs - keylen); - memcpy(bctx->opad, bctx->ipad, bs); - for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= 0x36; - bctx->opad[i] ^= 0x5c; + if (!test_bit(FLAGS_AUTO_XOR, &dd->flags)) { + memcpy(bctx->opad, bctx->ipad, bs); + + for (i = 0; i < bs; i++) { + bctx->ipad[i] ^= 0x36; + bctx->opad[i] ^= 0x5c; + } } return err; @@ -924,6 +1199,16 @@ static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm) return omap_sham_cra_init_alg(tfm, "sha1"); } +static int omap_sham_cra_sha224_init(struct crypto_tfm *tfm) +{ + return omap_sham_cra_init_alg(tfm, "sha224"); +} + +static int omap_sham_cra_sha256_init(struct crypto_tfm *tfm) +{ + return omap_sham_cra_init_alg(tfm, "sha256"); +} + static int omap_sham_cra_md5_init(struct crypto_tfm *tfm) { return omap_sham_cra_init_alg(tfm, "md5"); @@ -942,7 +1227,7 @@ static void omap_sham_cra_exit(struct crypto_tfm *tfm) } } -static struct ahash_alg algs[] = { +static struct ahash_alg algs_sha1_md5[] = { { .init = omap_sham_init, .update = omap_sham_update, @@ -1041,6 +1326,102 @@ static struct ahash_alg algs[] = { } }; +/* OMAP4 has some algs in addition to what OMAP2 has */ +static struct ahash_alg algs_sha224_sha256[] = { +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "omap-sha224", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "omap-sha256", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .setkey = omap_sham_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "omap-hmac-sha224", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx) + + sizeof(struct omap_sham_hmac_ctx), + .cra_alignmask = OMAP_ALIGN_MASK, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_sha224_init, + .cra_exit = omap_sham_cra_exit, + } +}, +{ + .init = omap_sham_init, + .update = omap_sham_update, + .final = omap_sham_final, + .finup = omap_sham_finup, + .digest = omap_sham_digest, + .setkey = omap_sham_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "omap-hmac-sha256", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_sham_ctx) + + sizeof(struct omap_sham_hmac_ctx), + .cra_alignmask = OMAP_ALIGN_MASK, + .cra_module = THIS_MODULE, + .cra_init = omap_sham_cra_sha256_init, + .cra_exit = omap_sham_cra_exit, + } +}, +}; + static void omap_sham_done_task(unsigned long data) { struct omap_sham_dev *dd = (struct omap_sham_dev *)data; @@ -1079,7 +1460,19 @@ finish: omap_sham_finish_req(dd->req, err); } -static irqreturn_t omap_sham_irq(int irq, void *dev_id) +static irqreturn_t omap_sham_irq_common(struct omap_sham_dev *dd) +{ + if (!test_bit(FLAGS_BUSY, &dd->flags)) { + dev_warn(dd->dev, "Interrupt when no active requests.\n"); + } else { + set_bit(FLAGS_OUTPUT_READY, &dd->flags); + tasklet_schedule(&dd->done_task); + } + + return IRQ_HANDLED; +} + +static irqreturn_t omap_sham_irq_omap2(int irq, void *dev_id) { struct omap_sham_dev *dd = dev_id; @@ -1091,61 +1484,188 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id) SHA_REG_CTRL_OUTPUT_READY); omap_sham_read(dd, SHA_REG_CTRL); - if (!test_bit(FLAGS_BUSY, &dd->flags)) { - dev_warn(dd->dev, "Interrupt when no active requests.\n"); - return IRQ_HANDLED; - } + return omap_sham_irq_common(dd); +} - set_bit(FLAGS_OUTPUT_READY, &dd->flags); - tasklet_schedule(&dd->done_task); +static irqreturn_t omap_sham_irq_omap4(int irq, void *dev_id) +{ + struct omap_sham_dev *dd = dev_id; - return IRQ_HANDLED; + omap_sham_write_mask(dd, SHA_REG_MASK(dd), 0, SHA_REG_MASK_IT_EN); + + return omap_sham_irq_common(dd); } -static void omap_sham_dma_callback(int lch, u16 ch_status, void *data) +static struct omap_sham_algs_info omap_sham_algs_info_omap2[] = { + { + .algs_list = algs_sha1_md5, + .size = ARRAY_SIZE(algs_sha1_md5), + }, +}; + +static const struct omap_sham_pdata omap_sham_pdata_omap2 = { + .algs_info = omap_sham_algs_info_omap2, + .algs_info_size = ARRAY_SIZE(omap_sham_algs_info_omap2), + .flags = BIT(FLAGS_BE32_SHA1), + .digest_size = SHA1_DIGEST_SIZE, + .copy_hash = omap_sham_copy_hash_omap2, + .write_ctrl = omap_sham_write_ctrl_omap2, + .trigger = omap_sham_trigger_omap2, + .poll_irq = omap_sham_poll_irq_omap2, + .intr_hdlr = omap_sham_irq_omap2, + .idigest_ofs = 0x00, + .din_ofs = 0x1c, + .digcnt_ofs = 0x14, + .rev_ofs = 0x5c, + .mask_ofs = 0x60, + .sysstatus_ofs = 0x64, + .major_mask = 0xf0, + .major_shift = 4, + .minor_mask = 0x0f, + .minor_shift = 0, +}; + +#ifdef CONFIG_OF +static struct omap_sham_algs_info omap_sham_algs_info_omap4[] = { + { + .algs_list = algs_sha1_md5, + .size = ARRAY_SIZE(algs_sha1_md5), + }, + { + .algs_list = algs_sha224_sha256, + .size = ARRAY_SIZE(algs_sha224_sha256), + }, +}; + +static const struct omap_sham_pdata omap_sham_pdata_omap4 = { + .algs_info = omap_sham_algs_info_omap4, + .algs_info_size = ARRAY_SIZE(omap_sham_algs_info_omap4), + .flags = BIT(FLAGS_AUTO_XOR), + .digest_size = SHA256_DIGEST_SIZE, + .copy_hash = omap_sham_copy_hash_omap4, + .write_ctrl = omap_sham_write_ctrl_omap4, + .trigger = omap_sham_trigger_omap4, + .poll_irq = omap_sham_poll_irq_omap4, + .intr_hdlr = omap_sham_irq_omap4, + .idigest_ofs = 0x020, + .din_ofs = 0x080, + .digcnt_ofs = 0x040, + .rev_ofs = 0x100, + .mask_ofs = 0x110, + .sysstatus_ofs = 0x114, + .major_mask = 0x0700, + .major_shift = 8, + .minor_mask = 0x003f, + .minor_shift = 0, +}; + +static const struct of_device_id omap_sham_of_match[] = { + { + .compatible = "ti,omap2-sham", + .data = &omap_sham_pdata_omap2, + }, + { + .compatible = "ti,omap4-sham", + .data = &omap_sham_pdata_omap4, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_sham_of_match); + +static int omap_sham_get_res_of(struct omap_sham_dev *dd, + struct device *dev, struct resource *res) { - struct omap_sham_dev *dd = data; + struct device_node *node = dev->of_node; + const struct of_device_id *match; + int err = 0; - if (ch_status != OMAP_DMA_BLOCK_IRQ) { - pr_err("omap-sham DMA error status: 0x%hx\n", ch_status); - dd->err = -EIO; - clear_bit(FLAGS_INIT, &dd->flags);/* request to re-initialize */ + match = of_match_device(of_match_ptr(omap_sham_of_match), dev); + if (!match) { + dev_err(dev, "no compatible OF match\n"); + err = -EINVAL; + goto err; } - set_bit(FLAGS_DMA_READY, &dd->flags); - tasklet_schedule(&dd->done_task); + err = of_address_to_resource(node, 0, res); + if (err < 0) { + dev_err(dev, "can't translate OF node address\n"); + err = -EINVAL; + goto err; + } + + dd->irq = of_irq_to_resource(node, 0, NULL); + if (!dd->irq) { + dev_err(dev, "can't translate OF irq value\n"); + err = -EINVAL; + goto err; + } + + dd->dma = -1; /* Dummy value that's unused */ + dd->pdata = match->data; + +err: + return err; } +#else +static const struct of_device_id omap_sham_of_match[] = { + {}, +}; -static int omap_sham_dma_init(struct omap_sham_dev *dd) +static int omap_sham_get_res_of(struct omap_sham_dev *dd, + struct device *dev, struct resource *res) { - int err; + return -EINVAL; +} +#endif - dd->dma_lch = -1; +static int omap_sham_get_res_pdev(struct omap_sham_dev *dd, + struct platform_device *pdev, struct resource *res) +{ + struct device *dev = &pdev->dev; + struct resource *r; + int err = 0; - err = omap_request_dma(dd->dma, dev_name(dd->dev), - omap_sham_dma_callback, dd, &dd->dma_lch); - if (err) { - dev_err(dd->dev, "Unable to request DMA channel\n"); - return err; + /* Get the base address */ + r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!r) { + dev_err(dev, "no MEM resource info\n"); + err = -ENODEV; + goto err; } + memcpy(res, r, sizeof(*res)); - return 0; -} + /* Get the IRQ */ + dd->irq = platform_get_irq(pdev, 0); + if (dd->irq < 0) { + dev_err(dev, "no IRQ resource info\n"); + err = dd->irq; + goto err; + } -static void omap_sham_dma_cleanup(struct omap_sham_dev *dd) -{ - if (dd->dma_lch >= 0) { - omap_free_dma(dd->dma_lch); - dd->dma_lch = -1; + /* Get the DMA */ + r = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!r) { + dev_err(dev, "no DMA resource info\n"); + err = -ENODEV; + goto err; } + dd->dma = r->start; + + /* Only OMAP2/3 can be non-DT */ + dd->pdata = &omap_sham_pdata_omap2; + +err: + return err; } static int omap_sham_probe(struct platform_device *pdev) { struct omap_sham_dev *dd; struct device *dev = &pdev->dev; - struct resource *res; + struct resource res; + dma_cap_mask_t mask; int err, i, j; + u32 rev; dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL); if (dd == NULL) { @@ -1161,89 +1681,75 @@ static int omap_sham_probe(struct platform_device *pdev) tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd); crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH); - dd->irq = -1; - - /* Get the base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "no MEM resource info\n"); - err = -ENODEV; - goto res_err; - } - dd->phys_base = res->start; - - /* Get the DMA */ - res = platform_get_resource(pdev, IORESOURCE_DMA, 0); - if (!res) { - dev_err(dev, "no DMA resource info\n"); - err = -ENODEV; + err = (dev->of_node) ? omap_sham_get_res_of(dd, dev, &res) : + omap_sham_get_res_pdev(dd, pdev, &res); + if (err) goto res_err; - } - dd->dma = res->start; - /* Get the IRQ */ - dd->irq = platform_get_irq(pdev, 0); - if (dd->irq < 0) { - dev_err(dev, "no IRQ resource info\n"); - err = dd->irq; + dd->io_base = devm_request_and_ioremap(dev, &res); + if (!dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; goto res_err; } + dd->phys_base = res.start; - err = request_irq(dd->irq, omap_sham_irq, - IRQF_TRIGGER_LOW, dev_name(dev), dd); + err = request_irq(dd->irq, dd->pdata->intr_hdlr, IRQF_TRIGGER_LOW, + dev_name(dev), dd); if (err) { dev_err(dev, "unable to request irq.\n"); goto res_err; } - err = omap_sham_dma_init(dd); - if (err) - goto dma_err; + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); - /* Initializing the clock */ - dd->iclk = clk_get(dev, "ick"); - if (IS_ERR(dd->iclk)) { - dev_err(dev, "clock intialization failed.\n"); - err = PTR_ERR(dd->iclk); - goto clk_err; + dd->dma_lch = dma_request_slave_channel_compat(mask, omap_dma_filter_fn, + &dd->dma, dev, "rx"); + if (!dd->dma_lch) { + dev_err(dev, "unable to obtain RX DMA engine channel %u\n", + dd->dma); + err = -ENXIO; + goto dma_err; } - dd->io_base = ioremap(dd->phys_base, SZ_4K); - if (!dd->io_base) { - dev_err(dev, "can't ioremap\n"); - err = -ENOMEM; - goto io_err; - } + dd->flags |= dd->pdata->flags; + + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + rev = omap_sham_read(dd, SHA_REG_REV(dd)); + pm_runtime_put_sync(&pdev->dev); - clk_enable(dd->iclk); dev_info(dev, "hw accel on OMAP rev %u.%u\n", - (omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4, - omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR); - clk_disable(dd->iclk); + (rev & dd->pdata->major_mask) >> dd->pdata->major_shift, + (rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift); spin_lock(&sham.lock); list_add_tail(&dd->list, &sham.dev_list); spin_unlock(&sham.lock); - for (i = 0; i < ARRAY_SIZE(algs); i++) { - err = crypto_register_ahash(&algs[i]); - if (err) - goto err_algs; + for (i = 0; i < dd->pdata->algs_info_size; i++) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + err = crypto_register_ahash( + &dd->pdata->algs_info[i].algs_list[j]); + if (err) + goto err_algs; + + dd->pdata->algs_info[i].registered++; + } } return 0; err_algs: - for (j = 0; j < i; j++) - crypto_unregister_ahash(&algs[j]); - iounmap(dd->io_base); -io_err: - clk_put(dd->iclk); -clk_err: - omap_sham_dma_cleanup(dd); + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_ahash( + &dd->pdata->algs_info[i].algs_list[j]); + pm_runtime_disable(dev); + dma_release_channel(dd->dma_lch); dma_err: - if (dd->irq >= 0) - free_irq(dd->irq, dd); + free_irq(dd->irq, dd); res_err: kfree(dd); dd = NULL; @@ -1256,7 +1762,7 @@ data_err: static int omap_sham_remove(struct platform_device *pdev) { static struct omap_sham_dev *dd; - int i; + int i, j; dd = platform_get_drvdata(pdev); if (!dd) @@ -1264,33 +1770,51 @@ static int omap_sham_remove(struct platform_device *pdev) spin_lock(&sham.lock); list_del(&dd->list); spin_unlock(&sham.lock); - for (i = 0; i < ARRAY_SIZE(algs); i++) - crypto_unregister_ahash(&algs[i]); + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_ahash( + &dd->pdata->algs_info[i].algs_list[j]); tasklet_kill(&dd->done_task); - iounmap(dd->io_base); - clk_put(dd->iclk); - omap_sham_dma_cleanup(dd); - if (dd->irq >= 0) - free_irq(dd->irq, dd); + pm_runtime_disable(&pdev->dev); + dma_release_channel(dd->dma_lch); + free_irq(dd->irq, dd); kfree(dd); dd = NULL; return 0; } +#ifdef CONFIG_PM_SLEEP +static int omap_sham_suspend(struct device *dev) +{ + pm_runtime_put_sync(dev); + return 0; +} + +static int omap_sham_resume(struct device *dev) +{ + pm_runtime_get_sync(dev); + return 0; +} +#endif + +static const struct dev_pm_ops omap_sham_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_sham_suspend, omap_sham_resume) +}; + static struct platform_driver omap_sham_driver = { .probe = omap_sham_probe, .remove = omap_sham_remove, .driver = { .name = "omap-sham", .owner = THIS_MODULE, + .pm = &omap_sham_pm_ops, + .of_match_table = omap_sham_of_match, }, }; static int __init omap_sham_mod_init(void) { - pr_info("loading %s driver\n", "omap-sham"); - return platform_driver_register(&omap_sham_driver); } diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 49ad8cb..4b31432 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -580,7 +580,7 @@ static int s5p_aes_probe(struct platform_device *pdev) resource_size(res), pdev->name)) return -EBUSY; - pdata->clk = clk_get(dev, "secss"); + pdata->clk = devm_clk_get(dev, "secss"); if (IS_ERR(pdata->clk)) { dev_err(dev, "failed to find secss clock source\n"); return -ENOENT; @@ -645,7 +645,6 @@ static int s5p_aes_probe(struct platform_device *pdev) err_irq: clk_disable(pdata->clk); - clk_put(pdata->clk); s5p_dev = NULL; platform_set_drvdata(pdev, NULL); @@ -667,7 +666,6 @@ static int s5p_aes_remove(struct platform_device *pdev) tasklet_kill(&pdata->tasklet); clk_disable(pdata->clk); - clk_put(pdata->clk); s5p_dev = NULL; platform_set_drvdata(pdev, NULL); |