116 files changed, 12079 insertions, 4348 deletions
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index efc8d90..5b05510 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -1671,7 +1671,7 @@ read(opfd, out, outlen);
   <chapter id="API"><title>Programming Interface</title>
    <sect1><title>Block Cipher Context Data Structures</title>
 !Pinclude/linux/crypto.h Block Cipher Context Data Structures
-!Finclude/linux/crypto.h aead_request
+!Finclude/crypto/aead.h aead_request
    </sect1>
    <sect1><title>Block Cipher Algorithm Definitions</title>
 !Pinclude/linux/crypto.h Block Cipher Algorithm Definitions
@@ -1680,7 +1680,7 @@ read(opfd, out, outlen);
 !Finclude/linux/crypto.h aead_alg
 !Finclude/linux/crypto.h blkcipher_alg
 !Finclude/linux/crypto.h cipher_alg
-!Finclude/linux/crypto.h rng_alg
+!Finclude/crypto/rng.h rng_alg
    </sect1>
    <sect1><title>Asynchronous Block Cipher API</title>
 !Pinclude/linux/crypto.h Asynchronous Block Cipher API
@@ -1704,26 +1704,26 @@ read(opfd, out, outlen);
 !Finclude/linux/crypto.h ablkcipher_request_set_crypt
    </sect1>
    <sect1><title>Authenticated Encryption With Associated Data (AEAD) Cipher API</title>
-!Pinclude/linux/crypto.h Authenticated Encryption With Associated Data (AEAD) Cipher API
-!Finclude/linux/crypto.h crypto_alloc_aead
-!Finclude/linux/crypto.h crypto_free_aead
-!Finclude/linux/crypto.h crypto_aead_ivsize
-!Finclude/linux/crypto.h crypto_aead_authsize
-!Finclude/linux/crypto.h crypto_aead_blocksize
-!Finclude/linux/crypto.h crypto_aead_setkey
-!Finclude/linux/crypto.h crypto_aead_setauthsize
-!Finclude/linux/crypto.h crypto_aead_encrypt
-!Finclude/linux/crypto.h crypto_aead_decrypt
+!Pinclude/crypto/aead.h Authenticated Encryption With Associated Data (AEAD) Cipher API
+!Finclude/crypto/aead.h crypto_alloc_aead
+!Finclude/crypto/aead.h crypto_free_aead
+!Finclude/crypto/aead.h crypto_aead_ivsize
+!Finclude/crypto/aead.h crypto_aead_authsize
+!Finclude/crypto/aead.h crypto_aead_blocksize
+!Finclude/crypto/aead.h crypto_aead_setkey
+!Finclude/crypto/aead.h crypto_aead_setauthsize
+!Finclude/crypto/aead.h crypto_aead_encrypt
+!Finclude/crypto/aead.h crypto_aead_decrypt
    </sect1>
    <sect1><title>Asynchronous AEAD Request Handle</title>
-!Pinclude/linux/crypto.h Asynchronous AEAD Request Handle
-!Finclude/linux/crypto.h crypto_aead_reqsize
-!Finclude/linux/crypto.h aead_request_set_tfm
-!Finclude/linux/crypto.h aead_request_alloc
-!Finclude/linux/crypto.h aead_request_free
-!Finclude/linux/crypto.h aead_request_set_callback
-!Finclude/linux/crypto.h aead_request_set_crypt
-!Finclude/linux/crypto.h aead_request_set_assoc
+!Pinclude/crypto/aead.h Asynchronous AEAD Request Handle
+!Finclude/crypto/aead.h crypto_aead_reqsize
+!Finclude/crypto/aead.h aead_request_set_tfm
+!Finclude/crypto/aead.h aead_request_alloc
+!Finclude/crypto/aead.h aead_request_free
+!Finclude/crypto/aead.h aead_request_set_callback
+!Finclude/crypto/aead.h aead_request_set_crypt
+!Finclude/crypto/aead.h aead_request_set_assoc
    </sect1>
    <sect1><title>Synchronous Block Cipher API</title>
 !Pinclude/linux/crypto.h Synchronous Block Cipher API
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
index 38988ef..f0d926b 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -1,9 +1,11 @@
-Freescale SoC SEC Security Engines versions 2.x-3.x
+Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
 
 Required properties:
 
 - compatible : Should contain entries for this and backward compatible
-  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0"
+  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3)
+                             e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1)
+    warning: SEC1 and SEC2 are mutually exclusive
 - reg : Offset and length of the register set for the device
 - interrupts : the SEC's interrupt number
 - fsl,num-channels : An integer representing the number of channels
diff --git a/MAINTAINERS b/MAINTAINERS
index 56a432d..912c9d9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4861,8 +4861,11 @@ F:	drivers/crypto/nx/
 IBM Power 842 compression accelerator
 M:	Dan Streetman <ddstreet@us.ibm.com>
 S:	Supported
-F:	drivers/crypto/nx/nx-842.c
+F:	drivers/crypto/nx/nx-842*
 F:	include/linux/nx842.h
+F:	include/linux/sw842.h
+F:	crypto/842.c
+F:	lib/842/
 
 IBM Power Linux RAID adapter
 M:	Brian King <brking@us.ibm.com>
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 8da2207..27ed1b1 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -53,20 +53,13 @@ config CRYPTO_SHA256_ARM
 	  SHA-256 secure hash standard (DFIPS 180-2) implemented
 	  using optimized ARM assembler and NEON, when available.
 
-config CRYPTO_SHA512_ARM_NEON
-	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
-	depends on KERNEL_MODE_NEON
-	select CRYPTO_SHA512
+config CRYPTO_SHA512_ARM
+	tristate "SHA-384/512 digest algorithm (ARM-asm and NEON)"
 	select CRYPTO_HASH
+	depends on !CPU_V7M
 	help
 	  SHA-512 secure hash standard (DFIPS 180-2) implemented
-	  using ARM NEON instructions, when available.
-
-	  This version of SHA implements a 512 bit hash with 256 bits of
-	  security against collision attacks.
-
-	  This code also includes SHA-384, a 384 bit hash with 192 bits
-	  of security against collision attacks.
+	  using optimized ARM assembler and NEON, when available.
 
 config CRYPTO_AES_ARM
 	tristate "AES cipher algorithms (ARM-asm)"
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 6ea8282..fc51507 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
 obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
 obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
-obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
 
 ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -30,7 +30,8 @@ sha1-arm-y	:= sha1-armv4-large.o sha1_glue.o
 sha1-arm-neon-y	:= sha1-armv7-neon.o sha1_neon_glue.o
 sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
 sha256-arm-y	:= sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
-sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+sha512-arm-y	:= sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
 sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
 sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
@@ -45,4 +46,7 @@ $(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
 $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
 	$(call cmd,perl)
 
-.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
+$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
+	$(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S $(obj)/sha512-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index 8cfa468..987aa63 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -101,15 +101,14 @@
 	\dround		q10, q11
 	blo		0f			@ AES-128: 10 rounds
 	vld1.8		{q10-q11}, [ip]!
-	beq		1f			@ AES-192: 12 rounds
 	\dround		q12, q13
+	beq		1f			@ AES-192: 12 rounds
 	vld1.8		{q12-q13}, [ip]
 	\dround		q10, q11
 0:	\fround		q12, q13, q14
 	bx		lr
 
-1:	\dround		q12, q13
-	\fround		q10, q11, q14
+1:	\fround		q10, q11, q14
 	bx		lr
 	.endm
 
@@ -122,8 +121,8 @@
 	 *   q2        : third in/output block (_3x version only)
 	 *   q8        : first round key
 	 *   q9        : secound round key
-	 *   ip        : address of 3rd round key
 	 *   q14       : final round key
+	 *   r2        : address of round key array
 	 *   r3        : number of rounds
 	 */
 	.align		6
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl
new file mode 100644
index 0000000..a2b11a8
--- /dev/null
+++ b/arch/arm/crypto/sha512-armv4.pl
@@ -0,0 +1,649 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA512 block procedure for ARMv4. September 2007.
+
+# This code is ~4.5 (four and a half) times faster than code generated
+# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+# Xscale PXA250 core].
+#
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 6% improvement on
+# Cortex A8 core and ~40 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 7%
+# improvement on Coxtex A8 core and ~38 cycles per byte.
+
+# March 2011.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process
+# one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+# August 2012.
+#
+# Improve NEON performance by 12% on Snapdragon S4. In absolute
+# terms it's 22.6 cycles per byte, which is disappointing result.
+# Technical writers asserted that 3-way S4 pipeline can sustain
+# multiple NEON instructions per cycle, but dual NEON issue could
+# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# for further details. On side note Cortex-A15 processes one byte in
+# 16 cycles.
+
+# Byte order [in]dependence. =========================================
+#
+# Originally caller was expected to maintain specific *dword* order in
+# h[0-7], namely with most significant dword at *lower* address, which
+# was reflected in below two parameters as 0 and 4. Now caller is
+# expected to maintain native byte order for whole 64-bit values.
+$hi="HI";
+$lo="LO";
+# ====================================================================
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0";	# parameter block
+$inp="r1";
+$len="r2";
+
+$Tlo="r3";
+$Thi="r4";
+$Alo="r5";
+$Ahi="r6";
+$Elo="r7";
+$Ehi="r8";
+$t0="r9";
+$t1="r10";
+$t2="r11";
+$t3="r12";
+############	r13 is stack pointer
+$Ktbl="r14";
+############	r15 is program counter
+
+$Aoff=8*0;
+$Boff=8*1;
+$Coff=8*2;
+$Doff=8*3;
+$Eoff=8*4;
+$Foff=8*5;
+$Goff=8*6;
+$Hoff=8*7;
+$Xoff=8*8;
+
+sub BODY_00_15() {
+my $magic = shift;
+$code.=<<___;
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	$t0,$Elo,lsr#14
+	str	$Tlo,[sp,#$Xoff+0]
+	mov	$t1,$Ehi,lsr#14
+	str	$Thi,[sp,#$Xoff+4]
+	eor	$t0,$t0,$Ehi,lsl#18
+	ldr	$t2,[sp,#$Hoff+0]	@ h.lo
+	eor	$t1,$t1,$Elo,lsl#18
+	ldr	$t3,[sp,#$Hoff+4]	@ h.hi
+	eor	$t0,$t0,$Elo,lsr#18
+	eor	$t1,$t1,$Ehi,lsr#18
+	eor	$t0,$t0,$Ehi,lsl#14
+	eor	$t1,$t1,$Elo,lsl#14
+	eor	$t0,$t0,$Ehi,lsr#9
+	eor	$t1,$t1,$Elo,lsr#9
+	eor	$t0,$t0,$Elo,lsl#23
+	eor	$t1,$t1,$Ehi,lsl#23	@ Sigma1(e)
+	adds	$Tlo,$Tlo,$t0
+	ldr	$t0,[sp,#$Foff+0]	@ f.lo
+	adc	$Thi,$Thi,$t1		@ T += Sigma1(e)
+	ldr	$t1,[sp,#$Foff+4]	@ f.hi
+	adds	$Tlo,$Tlo,$t2
+	ldr	$t2,[sp,#$Goff+0]	@ g.lo
+	adc	$Thi,$Thi,$t3		@ T += h
+	ldr	$t3,[sp,#$Goff+4]	@ g.hi
+
+	eor	$t0,$t0,$t2
+	str	$Elo,[sp,#$Eoff+0]
+	eor	$t1,$t1,$t3
+	str	$Ehi,[sp,#$Eoff+4]
+	and	$t0,$t0,$Elo
+	str	$Alo,[sp,#$Aoff+0]
+	and	$t1,$t1,$Ehi
+	str	$Ahi,[sp,#$Aoff+4]
+	eor	$t0,$t0,$t2
+	ldr	$t2,[$Ktbl,#$lo]	@ K[i].lo
+	eor	$t1,$t1,$t3		@ Ch(e,f,g)
+	ldr	$t3,[$Ktbl,#$hi]	@ K[i].hi
+
+	adds	$Tlo,$Tlo,$t0
+	ldr	$Elo,[sp,#$Doff+0]	@ d.lo
+	adc	$Thi,$Thi,$t1		@ T += Ch(e,f,g)
+	ldr	$Ehi,[sp,#$Doff+4]	@ d.hi
+	adds	$Tlo,$Tlo,$t2
+	and	$t0,$t2,#0xff
+	adc	$Thi,$Thi,$t3		@ T += K[i]
+	adds	$Elo,$Elo,$Tlo
+	ldr	$t2,[sp,#$Boff+0]	@ b.lo
+	adc	$Ehi,$Ehi,$Thi		@ d += T
+	teq	$t0,#$magic
+
+	ldr	$t3,[sp,#$Coff+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	$Ktbl,$Ktbl,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	$t0,$Alo,lsr#28
+	mov	$t1,$Ahi,lsr#28
+	eor	$t0,$t0,$Ahi,lsl#4
+	eor	$t1,$t1,$Alo,lsl#4
+	eor	$t0,$t0,$Ahi,lsr#2
+	eor	$t1,$t1,$Alo,lsr#2
+	eor	$t0,$t0,$Alo,lsl#30
+	eor	$t1,$t1,$Ahi,lsl#30
+	eor	$t0,$t0,$Ahi,lsr#7
+	eor	$t1,$t1,$Alo,lsr#7
+	eor	$t0,$t0,$Alo,lsl#25
+	eor	$t1,$t1,$Ahi,lsl#25	@ Sigma0(a)
+	adds	$Tlo,$Tlo,$t0
+	and	$t0,$Alo,$t2
+	adc	$Thi,$Thi,$t1		@ T += Sigma0(a)
+
+	ldr	$t1,[sp,#$Boff+4]	@ b.hi
+	orr	$Alo,$Alo,$t2
+	ldr	$t2,[sp,#$Coff+4]	@ c.hi
+	and	$Alo,$Alo,$t3
+	and	$t3,$Ahi,$t1
+	orr	$Ahi,$Ahi,$t1
+	orr	$Alo,$Alo,$t0		@ Maj(a,b,c).lo
+	and	$Ahi,$Ahi,$t2
+	adds	$Alo,$Alo,$Tlo
+	orr	$Ahi,$Ahi,$t3		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	$Ahi,$Ahi,$Thi		@ h += T
+	tst	$Ktbl,#1
+	add	$Ktbl,$Ktbl,#8
+___
+}
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K512,%object
+.align	5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size	K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha512_block_data_order
+.skip	32-4
+#else
+.skip	32
+#endif
+
+.global	sha512_block_data_order
+.type	sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha512_block_data_order
+#else
+	adr	r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#1
+	bne	.LNEON
+#endif
+	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
+	stmdb	sp!,{r4-r12,lr}
+	sub	$Ktbl,r3,#672		@ K512
+	sub	sp,sp,#9*8
+
+	ldr	$Elo,[$ctx,#$Eoff+$lo]
+	ldr	$Ehi,[$ctx,#$Eoff+$hi]
+	ldr	$t0, [$ctx,#$Goff+$lo]
+	ldr	$t1, [$ctx,#$Goff+$hi]
+	ldr	$t2, [$ctx,#$Hoff+$lo]
+	ldr	$t3, [$ctx,#$Hoff+$hi]
+.Loop:
+	str	$t0, [sp,#$Goff+0]
+	str	$t1, [sp,#$Goff+4]
+	str	$t2, [sp,#$Hoff+0]
+	str	$t3, [sp,#$Hoff+4]
+	ldr	$Alo,[$ctx,#$Aoff+$lo]
+	ldr	$Ahi,[$ctx,#$Aoff+$hi]
+	ldr	$Tlo,[$ctx,#$Boff+$lo]
+	ldr	$Thi,[$ctx,#$Boff+$hi]
+	ldr	$t0, [$ctx,#$Coff+$lo]
+	ldr	$t1, [$ctx,#$Coff+$hi]
+	ldr	$t2, [$ctx,#$Doff+$lo]
+	ldr	$t3, [$ctx,#$Doff+$hi]
+	str	$Tlo,[sp,#$Boff+0]
+	str	$Thi,[sp,#$Boff+4]
+	str	$t0, [sp,#$Coff+0]
+	str	$t1, [sp,#$Coff+4]
+	str	$t2, [sp,#$Doff+0]
+	str	$t3, [sp,#$Doff+4]
+	ldr	$Tlo,[$ctx,#$Foff+$lo]
+	ldr	$Thi,[$ctx,#$Foff+$hi]
+	str	$Tlo,[sp,#$Foff+0]
+	str	$Thi,[sp,#$Foff+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+	ldrb	$Tlo,[$inp,#7]
+	ldrb	$t0, [$inp,#6]
+	ldrb	$t1, [$inp,#5]
+	ldrb	$t2, [$inp,#4]
+	ldrb	$Thi,[$inp,#3]
+	ldrb	$t3, [$inp,#2]
+	orr	$Tlo,$Tlo,$t0,lsl#8
+	ldrb	$t0, [$inp,#1]
+	orr	$Tlo,$Tlo,$t1,lsl#16
+	ldrb	$t1, [$inp],#8
+	orr	$Tlo,$Tlo,$t2,lsl#24
+	orr	$Thi,$Thi,$t3,lsl#8
+	orr	$Thi,$Thi,$t0,lsl#16
+	orr	$Thi,$Thi,$t1,lsl#24
+#else
+	ldr	$Tlo,[$inp,#4]
+	ldr	$Thi,[$inp],#8
+#ifdef __ARMEL__
+	rev	$Tlo,$Tlo
+	rev	$Thi,$Thi
+#endif
+#endif
+___
+	&BODY_00_15(0x94);
+$code.=<<___;
+	tst	$Ktbl,#1
+	beq	.L00_15
+	ldr	$t0,[sp,#`$Xoff+8*(16-1)`+0]
+	ldr	$t1,[sp,#`$Xoff+8*(16-1)`+4]
+	bic	$Ktbl,$Ktbl,#1
+.L16_79:
+	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
+	mov	$Tlo,$t0,lsr#1
+	ldr	$t2,[sp,#`$Xoff+8*(16-14)`+0]
+	mov	$Thi,$t1,lsr#1
+	ldr	$t3,[sp,#`$Xoff+8*(16-14)`+4]
+	eor	$Tlo,$Tlo,$t1,lsl#31
+	eor	$Thi,$Thi,$t0,lsl#31
+	eor	$Tlo,$Tlo,$t0,lsr#8
+	eor	$Thi,$Thi,$t1,lsr#8
+	eor	$Tlo,$Tlo,$t1,lsl#24
+	eor	$Thi,$Thi,$t0,lsl#24
+	eor	$Tlo,$Tlo,$t0,lsr#7
+	eor	$Thi,$Thi,$t1,lsr#7
+	eor	$Tlo,$Tlo,$t1,lsl#25
+
+	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+	mov	$t0,$t2,lsr#19
+	mov	$t1,$t3,lsr#19
+	eor	$t0,$t0,$t3,lsl#13
+	eor	$t1,$t1,$t2,lsl#13
+	eor	$t0,$t0,$t3,lsr#29
+	eor	$t1,$t1,$t2,lsr#29
+	eor	$t0,$t0,$t2,lsl#3
+	eor	$t1,$t1,$t3,lsl#3
+	eor	$t0,$t0,$t2,lsr#6
+	eor	$t1,$t1,$t3,lsr#6
+	ldr	$t2,[sp,#`$Xoff+8*(16-9)`+0]
+	eor	$t0,$t0,$t3,lsl#26
+
+	ldr	$t3,[sp,#`$Xoff+8*(16-9)`+4]
+	adds	$Tlo,$Tlo,$t0
+	ldr	$t0,[sp,#`$Xoff+8*16`+0]
+	adc	$Thi,$Thi,$t1
+
+	ldr	$t1,[sp,#`$Xoff+8*16`+4]
+	adds	$Tlo,$Tlo,$t2
+	adc	$Thi,$Thi,$t3
+	adds	$Tlo,$Tlo,$t0
+	adc	$Thi,$Thi,$t1
+___
+	&BODY_00_15(0x17);
+$code.=<<___;
+#if __ARM_ARCH__>=7
+	ittt	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	$t0,[sp,#`$Xoff+8*(16-1)`+0]
+	ldreq	$t1,[sp,#`$Xoff+8*(16-1)`+4]
+	beq	.L16_79
+	bic	$Ktbl,$Ktbl,#1
+
+	ldr	$Tlo,[sp,#$Boff+0]
+	ldr	$Thi,[sp,#$Boff+4]
+	ldr	$t0, [$ctx,#$Aoff+$lo]
+	ldr	$t1, [$ctx,#$Aoff+$hi]
+	ldr	$t2, [$ctx,#$Boff+$lo]
+	ldr	$t3, [$ctx,#$Boff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Aoff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Aoff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Boff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Boff+$hi]
+
+	ldr	$Alo,[sp,#$Coff+0]
+	ldr	$Ahi,[sp,#$Coff+4]
+	ldr	$Tlo,[sp,#$Doff+0]
+	ldr	$Thi,[sp,#$Doff+4]
+	ldr	$t0, [$ctx,#$Coff+$lo]
+	ldr	$t1, [$ctx,#$Coff+$hi]
+	ldr	$t2, [$ctx,#$Doff+$lo]
+	ldr	$t3, [$ctx,#$Doff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Coff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Coff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Doff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Doff+$hi]
+
+	ldr	$Tlo,[sp,#$Foff+0]
+	ldr	$Thi,[sp,#$Foff+4]
+	ldr	$t0, [$ctx,#$Eoff+$lo]
+	ldr	$t1, [$ctx,#$Eoff+$hi]
+	ldr	$t2, [$ctx,#$Foff+$lo]
+	ldr	$t3, [$ctx,#$Foff+$hi]
+	adds	$Elo,$Elo,$t0
+	str	$Elo,[$ctx,#$Eoff+$lo]
+	adc	$Ehi,$Ehi,$t1
+	str	$Ehi,[$ctx,#$Eoff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Foff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Foff+$hi]
+
+	ldr	$Alo,[sp,#$Goff+0]
+	ldr	$Ahi,[sp,#$Goff+4]
+	ldr	$Tlo,[sp,#$Hoff+0]
+	ldr	$Thi,[sp,#$Hoff+4]
+	ldr	$t0, [$ctx,#$Goff+$lo]
+	ldr	$t1, [$ctx,#$Goff+$hi]
+	ldr	$t2, [$ctx,#$Hoff+$lo]
+	ldr	$t3, [$ctx,#$Hoff+$hi]
+	adds	$t0,$Alo,$t0
+	str	$t0, [$ctx,#$Goff+$lo]
+	adc	$t1,$Ahi,$t1
+	str	$t1, [$ctx,#$Goff+$hi]
+	adds	$t2,$Tlo,$t2
+	str	$t2, [$ctx,#$Hoff+$lo]
+	adc	$t3,$Thi,$t3
+	str	$t3, [$ctx,#$Hoff+$hi]
+
+	add	sp,sp,#640
+	sub	$Ktbl,$Ktbl,#640
+
+	teq	$inp,$len
+	bne	.Loop
+
+	add	sp,sp,#8*9		@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	bx	lr			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha512_block_data_order,.-sha512_block_data_order
+___
+
+{
+my @Sigma0=(28,34,39);
+my @Sigma1=(14,18,41);
+my @sigma0=(1, 8, 7);
+my @sigma1=(19,61,6);
+
+my $Ktbl="r3";
+my $cnt="r12";	# volatile register known as ip, intra-procedure-call scratch
+
+my @X=map("d$_",(0..15));
+my @V=($A,$B,$C,$D,$E,$F,$G,$H)=map("d$_",(16..23));
+
+sub NEON_00_15() {
+my $i=shift;
+my ($a,$b,$c,$d,$e,$f,$g,$h)=@_;
+my ($t0,$t1,$t2,$T1,$K,$Ch,$Maj)=map("d$_",(24..31));	# temps
+
+$code.=<<___ if ($i<16 || $i&1);
+	vshr.u64	$t0,$e,#@Sigma1[0]	@ $i
+#if $i<16
+	vld1.64		{@X[$i%16]},[$inp]!	@ handles unaligned
+#endif
+	vshr.u64	$t1,$e,#@Sigma1[1]
+#if $i>0
+	 vadd.i64	$a,$Maj			@ h+=Maj from the past
+#endif
+	vshr.u64	$t2,$e,#@Sigma1[2]
+___
+$code.=<<___;
+	vld1.64		{$K},[$Ktbl,:64]!	@ K[i++]
+	vsli.64		$t0,$e,#`64-@Sigma1[0]`
+	vsli.64		$t1,$e,#`64-@Sigma1[1]`
+	vmov		$Ch,$e
+	vsli.64		$t2,$e,#`64-@Sigma1[2]`
+#if $i<16 && defined(__ARMEL__)
+	vrev64.8	@X[$i],@X[$i]
+#endif
+	veor		$t1,$t0
+	vbsl		$Ch,$f,$g		@ Ch(e,f,g)
+	vshr.u64	$t0,$a,#@Sigma0[0]
+	veor		$t2,$t1			@ Sigma1(e)
+	vadd.i64	$T1,$Ch,$h
+	vshr.u64	$t1,$a,#@Sigma0[1]
+	vsli.64		$t0,$a,#`64-@Sigma0[0]`
+	vadd.i64	$T1,$t2
+	vshr.u64	$t2,$a,#@Sigma0[2]
+	vadd.i64	$K,@X[$i%16]
+	vsli.64		$t1,$a,#`64-@Sigma0[1]`
+	veor		$Maj,$a,$b
+	vsli.64		$t2,$a,#`64-@Sigma0[2]`
+	veor		$h,$t0,$t1
+	vadd.i64	$T1,$K
+	vbsl		$Maj,$c,$b		@ Maj(a,b,c)
+	veor		$h,$t2			@ Sigma0(a)
+	vadd.i64	$d,$T1
+	vadd.i64	$Maj,$T1
+	@ vadd.i64	$h,$Maj
+___
+}
+
+sub NEON_16_79() {
+my $i=shift;
+
+if ($i&1)	{ &NEON_00_15($i,@_); return; }
+
+# 2x-vectorized, therefore runs every 2nd round
+my @X=map("q$_",(0..7));			# view @X as 128-bit vector
+my ($t0,$t1,$s0,$s1) = map("q$_",(12..15));	# temps
+my ($d0,$d1,$d2) = map("d$_",(24..26));		# temps from NEON_00_15
+my $e=@_[4];					# $e from NEON_00_15
+$i /= 2;
+$code.=<<___;
+	vshr.u64	$t0,@X[($i+7)%8],#@sigma1[0]
+	vshr.u64	$t1,@X[($i+7)%8],#@sigma1[1]
+	 vadd.i64	@_[0],d30			@ h+=Maj from the past
+	vshr.u64	$s1,@X[($i+7)%8],#@sigma1[2]
+	vsli.64		$t0,@X[($i+7)%8],#`64-@sigma1[0]`
+	vext.8		$s0,@X[$i%8],@X[($i+1)%8],#8	@ X[i+1]
+	vsli.64		$t1,@X[($i+7)%8],#`64-@sigma1[1]`
+	veor		$s1,$t0
+	vshr.u64	$t0,$s0,#@sigma0[0]
+	veor		$s1,$t1				@ sigma1(X[i+14])
+	vshr.u64	$t1,$s0,#@sigma0[1]
+	vadd.i64	@X[$i%8],$s1
+	vshr.u64	$s1,$s0,#@sigma0[2]
+	vsli.64		$t0,$s0,#`64-@sigma0[0]`
+	vsli.64		$t1,$s0,#`64-@sigma0[1]`
+	vext.8		$s0,@X[($i+4)%8],@X[($i+5)%8],#8	@ X[i+9]
+	veor		$s1,$t0
+	vshr.u64	$d0,$e,#@Sigma1[0]		@ from NEON_00_15
+	vadd.i64	@X[$i%8],$s0
+	vshr.u64	$d1,$e,#@Sigma1[1]		@ from NEON_00_15
+	veor		$s1,$t1				@ sigma0(X[i+1])
+	vshr.u64	$d2,$e,#@Sigma1[2]		@ from NEON_00_15
+	vadd.i64	@X[$i%8],$s1
+___
+	&NEON_00_15(2*$i,@_);
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha512_block_data_order_neon
+.type	sha512_block_data_order_neon,%function
+.align	4
+sha512_block_data_order_neon:
+.LNEON:
+	dmb				@ errata #451034 on early Cortex A8
+	add	$len,$inp,$len,lsl#7	@ len to point at the end of inp
+	VFP_ABI_PUSH
+	adrl	$Ktbl,K512
+	vldmia	$ctx,{$A-$H}		@ load context
+.Loop_neon:
+___
+for($i=0;$i<16;$i++)	{ &NEON_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	mov		$cnt,#4
+.L16_79_neon:
+	subs		$cnt,#1
+___
+for(;$i<32;$i++)	{ &NEON_16_79($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+	bne		.L16_79_neon
+
+	 vadd.i64	$A,d30		@ h+=Maj from the past
+	vldmia		$ctx,{d24-d31}	@ load context to temp
+	vadd.i64	q8,q12		@ vectorized accumulate
+	vadd.i64	q9,q13
+	vadd.i64	q10,q14
+	vadd.i64	q11,q15
+	vstmia		$ctx,{$A-$H}	@ save context
+	teq		$inp,$len
+	sub		$Ktbl,#640	@ rewind K512
+	bne		.Loop_neon
+
+	VFP_ABI_POP
+	ret				@ bx lr
+.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+___
+}
+$code.=<<___;
+.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm	OPENSSL_armcap_P,4,4
+#endif
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4
+$code =~ s/\bret\b/bx	lr/gm;
+
+open SELF,$0;
+while(<SELF>) {
+	next if (/^#!/);
+	last if (!s/^#/@/ and !/^$/);
+	print;
+}
+close SELF;
+
+print $code;
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S
deleted file mode 100644
index fe99472..0000000
--- a/arch/arm/crypto/sha512-armv7-neon.S
+++ /dev/null
@@ -1,455 +0,0 @@
-/* sha512-armv7-neon.S  -  ARM/NEON assembly implementation of SHA-512 transform
- *
- * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- */
-
-#include <linux/linkage.h>
-
-
-.syntax unified
-.code   32
-.fpu neon
-
-.text
-
-/* structure of SHA512_CONTEXT */
-#define hd_a 0
-#define hd_b ((hd_a) + 8)
-#define hd_c ((hd_b) + 8)
-#define hd_d ((hd_c) + 8)
-#define hd_e ((hd_d) + 8)
-#define hd_f ((hd_e) + 8)
-#define hd_g ((hd_f) + 8)
-
-/* register macros */
-#define RK %r2
-
-#define RA d0
-#define RB d1
-#define RC d2
-#define RD d3
-#define RE d4
-#define RF d5
-#define RG d6
-#define RH d7
-
-#define RT0 d8
-#define RT1 d9
-#define RT2 d10
-#define RT3 d11
-#define RT4 d12
-#define RT5 d13
-#define RT6 d14
-#define RT7 d15
-
-#define RT01q q4
-#define RT23q q5
-#define RT45q q6
-#define RT67q q7
-
-#define RW0 d16
-#define RW1 d17
-#define RW2 d18
-#define RW3 d19
-#define RW4 d20
-#define RW5 d21
-#define RW6 d22
-#define RW7 d23
-#define RW8 d24
-#define RW9 d25
-#define RW10 d26
-#define RW11 d27
-#define RW12 d28
-#define RW13 d29
-#define RW14 d30
-#define RW15 d31
-
-#define RW01q q8
-#define RW23q q9
-#define RW45q q10
-#define RW67q q11
-#define RW89q q12
-#define RW1011q q13
-#define RW1213q q14
-#define RW1415q q15
-
-/***********************************************************************
- * ARM assembly implementation of sha512 transform
- ***********************************************************************/
-#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
-                     rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
-	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
-	vshr.u64 RT2, re, #14; \
-	vshl.u64 RT3, re, #64 - 14; \
-	interleave_op(arg1); \
-	vshr.u64 RT4, re, #18; \
-	vshl.u64 RT5, re, #64 - 18; \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, re, #41; \
-	vshl.u64 RT5, re, #64 - 41; \
-	vadd.u64 RT0, RT0, rw0; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, re; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, rf, rg; \
-	\
-	vadd.u64 RT1, RT1, rh; \
-	vshr.u64 RT2, ra, #28; \
-	vshl.u64 RT3, ra, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, ra, #34; \
-	vshl.u64 RT5, ra, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* h = Sum0 (a) + Maj (a, b, c); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, ra, #39; \
-	vshl.u64 RT5, ra, #64 - 39; \
-	veor.64 RT0, ra, rb; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rc, rb; \
-	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
-	veor.64 rh, RT2, RT3; \
-	\
-	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
-	vshr.u64 RT2, rd, #14; \
-	vshl.u64 RT3, rd, #64 - 14; \
-	vadd.u64 rh, rh, RT0; \
-	vshr.u64 RT4, rd, #18; \
-	vshl.u64 RT5, rd, #64 - 18; \
-	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rd, #41; \
-	vshl.u64 RT5, rd, #64 - 41; \
-	vadd.u64 RT0, RT0, rw1; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, rd; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, re, rf; \
-	\
-	vadd.u64 RT1, RT1, rg; \
-	vshr.u64 RT2, rh, #28; \
-	vshl.u64 RT3, rh, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, rh, #34; \
-	vshl.u64 RT5, rh, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* g = Sum0 (h) + Maj (h, a, b); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rh, #39; \
-	vshl.u64 RT5, rh, #64 - 39; \
-	veor.64 RT0, rh, ra; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rb, ra; \
-	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
-	veor.64 rg, RT2, RT3; \
-	\
-	/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
-	/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
-	\
-	/**** S0(w[1:2]) */ \
-	\
-	/* w[0:1] += w[9:10] */ \
-	/* RT23q = rw1:rw2 */ \
-	vext.u64 RT23q, rw01q, rw23q, #1; \
-	vadd.u64 rw0, rw9; \
-	vadd.u64 rg, rg, RT0; \
-	vadd.u64 rw1, rw10;\
-	vadd.u64 rg, rg, RT1; /* g+=t1; */ \
-	\
-	vshr.u64 RT45q, RT23q, #1; \
-	vshl.u64 RT67q, RT23q, #64 - 1; \
-	vshr.u64 RT01q, RT23q, #8; \
-	veor.u64 RT45q, RT45q, RT67q; \
-	vshl.u64 RT67q, RT23q, #64 - 8; \
-	veor.u64 RT45q, RT45q, RT01q; \
-	vshr.u64 RT01q, RT23q, #7; \
-	veor.u64 RT45q, RT45q, RT67q; \
-	\
-	/**** S1(w[14:15]) */ \
-	vshr.u64 RT23q, rw1415q, #6; \
-	veor.u64 RT01q, RT01q, RT45q; \
-	vshr.u64 RT45q, rw1415q, #19; \
-	vshl.u64 RT67q, rw1415q, #64 - 19; \
-	veor.u64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT45q, rw1415q, #61; \
-	veor.u64 RT23q, RT23q, RT67q; \
-	vshl.u64 RT67q, rw1415q, #64 - 61; \
-	veor.u64 RT23q, RT23q, RT45q; \
-	vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
-	veor.u64 RT01q, RT23q, RT67q;
-#define vadd_RT01q(rw01q) \
-	/* w[0:1] += S(w[14:15]) */ \
-	vadd.u64 rw01q, RT01q;
-
-#define dummy(_) /*_*/
-
-#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
-	              interleave_op1, arg1, interleave_op2, arg2) \
-	/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
-	vshr.u64 RT2, re, #14; \
-	vshl.u64 RT3, re, #64 - 14; \
-	interleave_op1(arg1); \
-	vshr.u64 RT4, re, #18; \
-	vshl.u64 RT5, re, #64 - 18; \
-	interleave_op2(arg2); \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, re, #41; \
-	vshl.u64 RT5, re, #64 - 41; \
-	vadd.u64 RT0, RT0, rw0; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, re; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, rf, rg; \
-	\
-	vadd.u64 RT1, RT1, rh; \
-	vshr.u64 RT2, ra, #28; \
-	vshl.u64 RT3, ra, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, ra, #34; \
-	vshl.u64 RT5, ra, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* h = Sum0 (a) + Maj (a, b, c); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, ra, #39; \
-	vshl.u64 RT5, ra, #64 - 39; \
-	veor.64 RT0, ra, rb; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rc, rb; \
-	vadd.u64 rd, rd, RT1; /* d+=t1; */ \
-	veor.64 rh, RT2, RT3; \
-	\
-	/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
-	vshr.u64 RT2, rd, #14; \
-	vshl.u64 RT3, rd, #64 - 14; \
-	vadd.u64 rh, rh, RT0; \
-	vshr.u64 RT4, rd, #18; \
-	vshl.u64 RT5, rd, #64 - 18; \
-	vadd.u64 rh, rh, RT1; /* h+=t1; */ \
-	vld1.64 {RT0}, [RK]!; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rd, #41; \
-	vshl.u64 RT5, rd, #64 - 41; \
-	vadd.u64 RT0, RT0, rw1; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vmov.64 RT7, rd; \
-	veor.64 RT1, RT2, RT3; \
-	vbsl.64 RT7, re, rf; \
-	\
-	vadd.u64 RT1, RT1, rg; \
-	vshr.u64 RT2, rh, #28; \
-	vshl.u64 RT3, rh, #64 - 28; \
-	vadd.u64 RT1, RT1, RT0; \
-	vshr.u64 RT4, rh, #34; \
-	vshl.u64 RT5, rh, #64 - 34; \
-	vadd.u64 RT1, RT1, RT7; \
-	\
-	/* g = Sum0 (h) + Maj (h, a, b); */ \
-	veor.64 RT23q, RT23q, RT45q; \
-	vshr.u64 RT4, rh, #39; \
-	vshl.u64 RT5, rh, #64 - 39; \
-	veor.64 RT0, rh, ra; \
-	veor.64 RT23q, RT23q, RT45q; \
-	vbsl.64 RT0, rb, ra; \
-	vadd.u64 rc, rc, RT1; /* c+=t1; */ \
-	veor.64 rg, RT2, RT3;
-#define vadd_rg_RT0(rg) \
-	vadd.u64 rg, rg, RT0;
-#define vadd_rg_RT1(rg) \
-	vadd.u64 rg, rg, RT1; /* g+=t1; */
-
-.align 3
-ENTRY(sha512_transform_neon)
-	/* Input:
-	 *	%r0: SHA512_CONTEXT
-	 *	%r1: data
-	 *	%r2: u64 k[] constants
-	 *	%r3: nblks
-	 */
-	push {%lr};
-
-	mov %lr, #0;
-
-	/* Load context to d0-d7 */
-	vld1.64 {RA-RD}, [%r0]!;
-	vld1.64 {RE-RH}, [%r0];
-	sub %r0, #(4*8);
-
-	/* Load input to w[16], d16-d31 */
-	/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
-	vld1.64 {RW0-RW3}, [%r1]!;
-	vld1.64 {RW4-RW7}, [%r1]!;
-	vld1.64 {RW8-RW11}, [%r1]!;
-	vld1.64 {RW12-RW15}, [%r1]!;
-#ifdef __ARMEL__
-	/* byteswap */
-	vrev64.8 RW01q, RW01q;
-	vrev64.8 RW23q, RW23q;
-	vrev64.8 RW45q, RW45q;
-	vrev64.8 RW67q, RW67q;
-	vrev64.8 RW89q, RW89q;
-	vrev64.8 RW1011q, RW1011q;
-	vrev64.8 RW1213q, RW1213q;
-	vrev64.8 RW1415q, RW1415q;
-#endif
-
-	/* EABI says that d8-d15 must be preserved by callee. */
-	/*vpush {RT0-RT7};*/
-
-.Loop:
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
-		     RW23q, RW1415q, RW9, RW10, dummy, _);
-	b .Lenter_rounds;
-
-.Loop_rounds:
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
-		     RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
-.Lenter_rounds:
-	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
-		     RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
-	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
-		     RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
-	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
-		     RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
-	rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
-		     RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
-	rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
-		     RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
-	add %lr, #16;
-	rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
-		     RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
-	cmp %lr, #64;
-	rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
-		     RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
-	bne .Loop_rounds;
-
-	subs %r3, #1;
-
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
-		      vadd_RT01q, RW1415q, dummy, _);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-	beq .Lhandle_tail;
-	vld1.64 {RW0-RW3}, [%r1]!;
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
-	vrev64.8 RW01q, RW01q;
-	vrev64.8 RW23q, RW23q;
-#endif
-	vld1.64 {RW4-RW7}, [%r1]!;
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
-		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-#ifdef __ARMEL__
-	vrev64.8 RW45q, RW45q;
-	vrev64.8 RW67q, RW67q;
-#endif
-	vld1.64 {RW8-RW11}, [%r1]!;
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-#ifdef __ARMEL__
-	vrev64.8 RW89q, RW89q;
-	vrev64.8 RW1011q, RW1011q;
-#endif
-	vld1.64 {RW12-RW15}, [%r1]!;
-	vadd_rg_RT0(RA);
-	vadd_rg_RT1(RA);
-
-	/* Load context */
-	vld1.64 {RT0-RT3}, [%r0]!;
-	vld1.64 {RT4-RT7}, [%r0];
-	sub %r0, #(4*8);
-
-#ifdef __ARMEL__
-	vrev64.8 RW1213q, RW1213q;
-	vrev64.8 RW1415q, RW1415q;
-#endif
-
-	vadd.u64 RA, RT0;
-	vadd.u64 RB, RT1;
-	vadd.u64 RC, RT2;
-	vadd.u64 RD, RT3;
-	vadd.u64 RE, RT4;
-	vadd.u64 RF, RT5;
-	vadd.u64 RG, RT6;
-	vadd.u64 RH, RT7;
-
-	/* Store the first half of context */
-	vst1.64 {RA-RD}, [%r0]!;
-	sub RK, $(8*80);
-	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
-	mov %lr, #0;
-	sub %r0, #(4*8);
-
-	b .Loop;
-
-.Lhandle_tail:
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-	rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
-		      vadd_rg_RT0, RA, vadd_rg_RT1, RA);
-	rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
-		      vadd_rg_RT0, RG, vadd_rg_RT1, RG);
-	rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
-		      vadd_rg_RT0, RE, vadd_rg_RT1, RE);
-	rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
-		      vadd_rg_RT0, RC, vadd_rg_RT1, RC);
-
-	/* Load context to d16-d23 */
-	vld1.64 {RW0-RW3}, [%r0]!;
-	vadd_rg_RT0(RA);
-	vld1.64 {RW4-RW7}, [%r0];
-	vadd_rg_RT1(RA);
-	sub %r0, #(4*8);
-
-	vadd.u64 RA, RW0;
-	vadd.u64 RB, RW1;
-	vadd.u64 RC, RW2;
-	vadd.u64 RD, RW3;
-	vadd.u64 RE, RW4;
-	vadd.u64 RF, RW5;
-	vadd.u64 RG, RW6;
-	vadd.u64 RH, RW7;
-
-	/* Store the first half of context */
-	vst1.64 {RA-RD}, [%r0]!;
-
-	/* Clear used registers */
-	/* d16-d31 */
-	veor.u64 RW01q, RW01q;
-	veor.u64 RW23q, RW23q;
-	veor.u64 RW45q, RW45q;
-	veor.u64 RW67q, RW67q;
-	vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
-	veor.u64 RW89q, RW89q;
-	veor.u64 RW1011q, RW1011q;
-	veor.u64 RW1213q, RW1213q;
-	veor.u64 RW1415q, RW1415q;
-	/* d8-d15 */
-	/*vpop {RT0-RT7};*/
-	/* d0-d7 (q0-q3) */
-	veor.u64 %q0, %q0;
-	veor.u64 %q1, %q1;
-	veor.u64 %q2, %q2;
-	veor.u64 %q3, %q3;
-
-	pop {%pc};
-ENDPROC(sha512_transform_neon)
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
new file mode 100644
index 0000000..3694c4d
--- /dev/null
+++ b/arch/arm/crypto/sha512-core.S_shipped
@@ -0,0 +1,1861 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA512 block procedure for ARMv4. September 2007.
+
+@ This code is ~4.5 (four and a half) times faster than code generated
+@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+@ Xscale PXA250 core].
+@
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
+@ Cortex A8 core and ~40 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 7%
+@ improvement on Coxtex A8 core and ~38 cycles per byte.
+
+@ March 2011.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process
+@ one byte in 23.3 cycles or ~60% faster than integer-only code.
+
+@ August 2012.
+@
+@ Improve NEON performance by 12% on Snapdragon S4. In absolute
+@ terms it's 22.6 cycles per byte, which is disappointing result.
+@ Technical writers asserted that 3-way S4 pipeline can sustain
+@ multiple NEON instructions per cycle, but dual NEON issue could
+@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+@ for further details. On side note Cortex-A15 processes one byte in
+@ 16 cycles.
+
+@ Byte order [in]dependence. =========================================
+@
+@ Originally caller was expected to maintain specific *dword* order in
+@ h[0-7], namely with most significant dword at *lower* address, which
+@ was reflected in below two parameters as 0 and 4. Now caller is
+@ expected to maintain native byte order for whole 64-bit values.
+#ifndef __KERNEL__
+# include "arm_arch.h"
+# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
+# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+# define VFP_ABI_PUSH
+# define VFP_ABI_POP
+#endif
+
+#ifdef __ARMEL__
+# define LO 0
+# define HI 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+#else
+# define HI 0
+# define LO 4
+# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code	32
+#else
+.syntax unified
+# ifdef __thumb2__
+#  define adrl adr
+.thumb
+# else
+.code   32
+# endif
+#endif
+
+.type	K512,%object
+.align	5
+K512:
+WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
+WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
+WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
+WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
+WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
+WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
+WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
+WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
+WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
+WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
+WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
+WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
+WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
+WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
+WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
+WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
+WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
+WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
+WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
+WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
+WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
+WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
+WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
+WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
+WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
+WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
+WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
+WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
+WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
+WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
+WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
+WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
+WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
+WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
+WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
+WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
+WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
+WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
+WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
+WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
+.size	K512,.-K512
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-sha512_block_data_order
+.skip	32-4
+#else
+.skip	32
+#endif
+
+.global	sha512_block_data_order
+.type	sha512_block_data_order,%function
+sha512_block_data_order:
+#if __ARM_ARCH__<7
+	sub	r3,pc,#8		@ sha512_block_data_order
+#else
+	adr	r3,sha512_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+	ldr	r12,.LOPENSSL_armcap
+	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
+	tst	r12,#1
+	bne	.LNEON
+#endif
+	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
+	stmdb	sp!,{r4-r12,lr}
+	sub	r14,r3,#672		@ K512
+	sub	sp,sp,#9*8
+
+	ldr	r7,[r0,#32+LO]
+	ldr	r8,[r0,#32+HI]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+.Loop:
+	str	r9, [sp,#48+0]
+	str	r10, [sp,#48+4]
+	str	r11, [sp,#56+0]
+	str	r12, [sp,#56+4]
+	ldr	r5,[r0,#0+LO]
+	ldr	r6,[r0,#0+HI]
+	ldr	r3,[r0,#8+LO]
+	ldr	r4,[r0,#8+HI]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	str	r3,[sp,#8+0]
+	str	r4,[sp,#8+4]
+	str	r9, [sp,#16+0]
+	str	r10, [sp,#16+4]
+	str	r11, [sp,#24+0]
+	str	r12, [sp,#24+4]
+	ldr	r3,[r0,#40+LO]
+	ldr	r4,[r0,#40+HI]
+	str	r3,[sp,#40+0]
+	str	r4,[sp,#40+4]
+
+.L00_15:
+#if __ARM_ARCH__<7
+	ldrb	r3,[r1,#7]
+	ldrb	r9, [r1,#6]
+	ldrb	r10, [r1,#5]
+	ldrb	r11, [r1,#4]
+	ldrb	r4,[r1,#3]
+	ldrb	r12, [r1,#2]
+	orr	r3,r3,r9,lsl#8
+	ldrb	r9, [r1,#1]
+	orr	r3,r3,r10,lsl#16
+	ldrb	r10, [r1],#8
+	orr	r3,r3,r11,lsl#24
+	orr	r4,r4,r12,lsl#8
+	orr	r4,r4,r9,lsl#16
+	orr	r4,r4,r10,lsl#24
+#else
+	ldr	r3,[r1,#4]
+	ldr	r4,[r1],#8
+#ifdef __ARMEL__
+	rev	r3,r3
+	rev	r4,r4
+#endif
+#endif
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#148
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+	tst	r14,#1
+	beq	.L00_15
+	ldr	r9,[sp,#184+0]
+	ldr	r10,[sp,#184+4]
+	bic	r14,r14,#1
+.L16_79:
+	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
+	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
+	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
+	mov	r3,r9,lsr#1
+	ldr	r11,[sp,#80+0]
+	mov	r4,r10,lsr#1
+	ldr	r12,[sp,#80+4]
+	eor	r3,r3,r10,lsl#31
+	eor	r4,r4,r9,lsl#31
+	eor	r3,r3,r9,lsr#8
+	eor	r4,r4,r10,lsr#8
+	eor	r3,r3,r10,lsl#24
+	eor	r4,r4,r9,lsl#24
+	eor	r3,r3,r9,lsr#7
+	eor	r4,r4,r10,lsr#7
+	eor	r3,r3,r10,lsl#25
+
+	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
+	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
+	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
+	mov	r9,r11,lsr#19
+	mov	r10,r12,lsr#19
+	eor	r9,r9,r12,lsl#13
+	eor	r10,r10,r11,lsl#13
+	eor	r9,r9,r12,lsr#29
+	eor	r10,r10,r11,lsr#29
+	eor	r9,r9,r11,lsl#3
+	eor	r10,r10,r12,lsl#3
+	eor	r9,r9,r11,lsr#6
+	eor	r10,r10,r12,lsr#6
+	ldr	r11,[sp,#120+0]
+	eor	r9,r9,r12,lsl#26
+
+	ldr	r12,[sp,#120+4]
+	adds	r3,r3,r9
+	ldr	r9,[sp,#192+0]
+	adc	r4,r4,r10
+
+	ldr	r10,[sp,#192+4]
+	adds	r3,r3,r11
+	adc	r4,r4,r12
+	adds	r3,r3,r9
+	adc	r4,r4,r10
+	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
+	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
+	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
+	mov	r9,r7,lsr#14
+	str	r3,[sp,#64+0]
+	mov	r10,r8,lsr#14
+	str	r4,[sp,#64+4]
+	eor	r9,r9,r8,lsl#18
+	ldr	r11,[sp,#56+0]	@ h.lo
+	eor	r10,r10,r7,lsl#18
+	ldr	r12,[sp,#56+4]	@ h.hi
+	eor	r9,r9,r7,lsr#18
+	eor	r10,r10,r8,lsr#18
+	eor	r9,r9,r8,lsl#14
+	eor	r10,r10,r7,lsl#14
+	eor	r9,r9,r8,lsr#9
+	eor	r10,r10,r7,lsr#9
+	eor	r9,r9,r7,lsl#23
+	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
+	adds	r3,r3,r9
+	ldr	r9,[sp,#40+0]	@ f.lo
+	adc	r4,r4,r10		@ T += Sigma1(e)
+	ldr	r10,[sp,#40+4]	@ f.hi
+	adds	r3,r3,r11
+	ldr	r11,[sp,#48+0]	@ g.lo
+	adc	r4,r4,r12		@ T += h
+	ldr	r12,[sp,#48+4]	@ g.hi
+
+	eor	r9,r9,r11
+	str	r7,[sp,#32+0]
+	eor	r10,r10,r12
+	str	r8,[sp,#32+4]
+	and	r9,r9,r7
+	str	r5,[sp,#0+0]
+	and	r10,r10,r8
+	str	r6,[sp,#0+4]
+	eor	r9,r9,r11
+	ldr	r11,[r14,#LO]	@ K[i].lo
+	eor	r10,r10,r12		@ Ch(e,f,g)
+	ldr	r12,[r14,#HI]	@ K[i].hi
+
+	adds	r3,r3,r9
+	ldr	r7,[sp,#24+0]	@ d.lo
+	adc	r4,r4,r10		@ T += Ch(e,f,g)
+	ldr	r8,[sp,#24+4]	@ d.hi
+	adds	r3,r3,r11
+	and	r9,r11,#0xff
+	adc	r4,r4,r12		@ T += K[i]
+	adds	r7,r7,r3
+	ldr	r11,[sp,#8+0]	@ b.lo
+	adc	r8,r8,r4		@ d += T
+	teq	r9,#23
+
+	ldr	r12,[sp,#16+0]	@ c.lo
+#if __ARM_ARCH__>=7
+	it	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	orreq	r14,r14,#1
+	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
+	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
+	mov	r9,r5,lsr#28
+	mov	r10,r6,lsr#28
+	eor	r9,r9,r6,lsl#4
+	eor	r10,r10,r5,lsl#4
+	eor	r9,r9,r6,lsr#2
+	eor	r10,r10,r5,lsr#2
+	eor	r9,r9,r5,lsl#30
+	eor	r10,r10,r6,lsl#30
+	eor	r9,r9,r6,lsr#7
+	eor	r10,r10,r5,lsr#7
+	eor	r9,r9,r5,lsl#25
+	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
+	adds	r3,r3,r9
+	and	r9,r5,r11
+	adc	r4,r4,r10		@ T += Sigma0(a)
+
+	ldr	r10,[sp,#8+4]	@ b.hi
+	orr	r5,r5,r11
+	ldr	r11,[sp,#16+4]	@ c.hi
+	and	r5,r5,r12
+	and	r12,r6,r10
+	orr	r6,r6,r10
+	orr	r5,r5,r9		@ Maj(a,b,c).lo
+	and	r6,r6,r11
+	adds	r5,r5,r3
+	orr	r6,r6,r12		@ Maj(a,b,c).hi
+	sub	sp,sp,#8
+	adc	r6,r6,r4		@ h += T
+	tst	r14,#1
+	add	r14,r14,#8
+#if __ARM_ARCH__>=7
+	ittt	eq			@ Thumb2 thing, sanity check in ARM
+#endif
+	ldreq	r9,[sp,#184+0]
+	ldreq	r10,[sp,#184+4]
+	beq	.L16_79
+	bic	r14,r14,#1
+
+	ldr	r3,[sp,#8+0]
+	ldr	r4,[sp,#8+4]
+	ldr	r9, [r0,#0+LO]
+	ldr	r10, [r0,#0+HI]
+	ldr	r11, [r0,#8+LO]
+	ldr	r12, [r0,#8+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#0+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#0+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#8+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#8+HI]
+
+	ldr	r5,[sp,#16+0]
+	ldr	r6,[sp,#16+4]
+	ldr	r3,[sp,#24+0]
+	ldr	r4,[sp,#24+4]
+	ldr	r9, [r0,#16+LO]
+	ldr	r10, [r0,#16+HI]
+	ldr	r11, [r0,#24+LO]
+	ldr	r12, [r0,#24+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#16+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#16+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#24+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#24+HI]
+
+	ldr	r3,[sp,#40+0]
+	ldr	r4,[sp,#40+4]
+	ldr	r9, [r0,#32+LO]
+	ldr	r10, [r0,#32+HI]
+	ldr	r11, [r0,#40+LO]
+	ldr	r12, [r0,#40+HI]
+	adds	r7,r7,r9
+	str	r7,[r0,#32+LO]
+	adc	r8,r8,r10
+	str	r8,[r0,#32+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#40+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#40+HI]
+
+	ldr	r5,[sp,#48+0]
+	ldr	r6,[sp,#48+4]
+	ldr	r3,[sp,#56+0]
+	ldr	r4,[sp,#56+4]
+	ldr	r9, [r0,#48+LO]
+	ldr	r10, [r0,#48+HI]
+	ldr	r11, [r0,#56+LO]
+	ldr	r12, [r0,#56+HI]
+	adds	r9,r5,r9
+	str	r9, [r0,#48+LO]
+	adc	r10,r6,r10
+	str	r10, [r0,#48+HI]
+	adds	r11,r3,r11
+	str	r11, [r0,#56+LO]
+	adc	r12,r4,r12
+	str	r12, [r0,#56+HI]
+
+	add	sp,sp,#640
+	sub	r14,r14,#640
+
+	teq	r1,r2
+	bne	.Loop
+
+	add	sp,sp,#8*9		@ destroy frame
+#if __ARM_ARCH__>=5
+	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	sha512_block_data_order,.-sha512_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.global	sha512_block_data_order_neon
+.type	sha512_block_data_order_neon,%function
+.align	4
+sha512_block_data_order_neon:
+.LNEON:
+	dmb				@ errata #451034 on early Cortex A8
+	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
+	VFP_ABI_PUSH
+	adrl	r3,K512
+	vldmia	r0,{d16-d23}		@ load context
+.Loop_neon:
+	vshr.u64	d24,d20,#14	@ 0
+#if 0<16
+	vld1.64		{d0},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+#if 0>0
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 0<16 && defined(__ARMEL__)
+	vrev64.8	d0,d0
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d0
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 1
+#if 1<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 1>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 1<16 && defined(__ARMEL__)
+	vrev64.8	d1,d1
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d1
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 2
+#if 2<16
+	vld1.64		{d2},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+#if 2>0
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 2<16 && defined(__ARMEL__)
+	vrev64.8	d2,d2
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d2
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 3
+#if 3<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 3>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 3<16 && defined(__ARMEL__)
+	vrev64.8	d3,d3
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d3
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 4
+#if 4<16
+	vld1.64		{d4},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+#if 4>0
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 4<16 && defined(__ARMEL__)
+	vrev64.8	d4,d4
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d4
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 5
+#if 5<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 5>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 5<16 && defined(__ARMEL__)
+	vrev64.8	d5,d5
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d5
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 6
+#if 6<16
+	vld1.64		{d6},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+#if 6>0
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 6<16 && defined(__ARMEL__)
+	vrev64.8	d6,d6
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d6
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 7
+#if 7<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 7>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 7<16 && defined(__ARMEL__)
+	vrev64.8	d7,d7
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d7
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	vshr.u64	d24,d20,#14	@ 8
+#if 8<16
+	vld1.64		{d8},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d20,#18
+#if 8>0
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d20,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 8<16 && defined(__ARMEL__)
+	vrev64.8	d8,d8
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d8
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 9
+#if 9<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 9>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 9<16 && defined(__ARMEL__)
+	vrev64.8	d9,d9
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d9
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	d24,d18,#14	@ 10
+#if 10<16
+	vld1.64		{d10},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d18,#18
+#if 10>0
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d18,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 10<16 && defined(__ARMEL__)
+	vrev64.8	d10,d10
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d10
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 11
+#if 11<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 11>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 11<16 && defined(__ARMEL__)
+	vrev64.8	d11,d11
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d11
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	d24,d16,#14	@ 12
+#if 12<16
+	vld1.64		{d12},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d16,#18
+#if 12>0
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d16,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 12<16 && defined(__ARMEL__)
+	vrev64.8	d12,d12
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d12
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 13
+#if 13<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 13>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 13<16 && defined(__ARMEL__)
+	vrev64.8	d13,d13
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d13
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	d24,d22,#14	@ 14
+#if 14<16
+	vld1.64		{d14},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d22,#18
+#if 14>0
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d22,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 14<16 && defined(__ARMEL__)
+	vrev64.8	d14,d14
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d14
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 15
+#if 15<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 15>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 15<16 && defined(__ARMEL__)
+	vrev64.8	d15,d15
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d15
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	mov		r12,#4
+.L16_79_neon:
+	subs		r12,#1
+	vshr.u64	q12,q7,#19
+	vshr.u64	q13,q7,#61
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+	vshr.u64	q15,q7,#6
+	vsli.64		q12,q7,#45
+	vext.8		q14,q0,q1,#8	@ X[i+1]
+	vsli.64		q13,q7,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q0,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q4,q5,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q0,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q0,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 16<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d0
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 17
+#if 17<16
+	vld1.64		{d1},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 17>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 17<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d1
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	q12,q0,#19
+	vshr.u64	q13,q0,#61
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+	vshr.u64	q15,q0,#6
+	vsli.64		q12,q0,#45
+	vext.8		q14,q1,q2,#8	@ X[i+1]
+	vsli.64		q13,q0,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q1,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q5,q6,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q1,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q1,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 18<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d2
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 19
+#if 19<16
+	vld1.64		{d3},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 19>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 19<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d3
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	q12,q1,#19
+	vshr.u64	q13,q1,#61
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+	vshr.u64	q15,q1,#6
+	vsli.64		q12,q1,#45
+	vext.8		q14,q2,q3,#8	@ X[i+1]
+	vsli.64		q13,q1,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q2,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q6,q7,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q2,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q2,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 20<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d4
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 21
+#if 21<16
+	vld1.64		{d5},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 21>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 21<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d5
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	q12,q2,#19
+	vshr.u64	q13,q2,#61
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+	vshr.u64	q15,q2,#6
+	vsli.64		q12,q2,#45
+	vext.8		q14,q3,q4,#8	@ X[i+1]
+	vsli.64		q13,q2,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q3,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q7,q0,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q3,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q3,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 22<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d6
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 23
+#if 23<16
+	vld1.64		{d7},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 23>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 23<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d7
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	vshr.u64	q12,q3,#19
+	vshr.u64	q13,q3,#61
+	 vadd.i64	d16,d30			@ h+=Maj from the past
+	vshr.u64	q15,q3,#6
+	vsli.64		q12,q3,#45
+	vext.8		q14,q4,q5,#8	@ X[i+1]
+	vsli.64		q13,q3,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q4,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q0,q1,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d20,#14		@ from NEON_00_15
+	vadd.i64	q4,q14
+	vshr.u64	d25,d20,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d20,#41		@ from NEON_00_15
+	vadd.i64	q4,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d20,#50
+	vsli.64		d25,d20,#46
+	vmov		d29,d20
+	vsli.64		d26,d20,#23
+#if 24<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d21,d22		@ Ch(e,f,g)
+	vshr.u64	d24,d16,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d23
+	vshr.u64	d25,d16,#34
+	vsli.64		d24,d16,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d16,#39
+	vadd.i64	d28,d8
+	vsli.64		d25,d16,#30
+	veor		d30,d16,d17
+	vsli.64		d26,d16,#25
+	veor		d23,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d18,d17		@ Maj(a,b,c)
+	veor		d23,d26			@ Sigma0(a)
+	vadd.i64	d19,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d23,d30
+	vshr.u64	d24,d19,#14	@ 25
+#if 25<16
+	vld1.64		{d9},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d19,#18
+#if 25>0
+	 vadd.i64	d23,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d19,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d19,#50
+	vsli.64		d25,d19,#46
+	vmov		d29,d19
+	vsli.64		d26,d19,#23
+#if 25<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d20,d21		@ Ch(e,f,g)
+	vshr.u64	d24,d23,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d22
+	vshr.u64	d25,d23,#34
+	vsli.64		d24,d23,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d23,#39
+	vadd.i64	d28,d9
+	vsli.64		d25,d23,#30
+	veor		d30,d23,d16
+	vsli.64		d26,d23,#25
+	veor		d22,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d17,d16		@ Maj(a,b,c)
+	veor		d22,d26			@ Sigma0(a)
+	vadd.i64	d18,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d22,d30
+	vshr.u64	q12,q4,#19
+	vshr.u64	q13,q4,#61
+	 vadd.i64	d22,d30			@ h+=Maj from the past
+	vshr.u64	q15,q4,#6
+	vsli.64		q12,q4,#45
+	vext.8		q14,q5,q6,#8	@ X[i+1]
+	vsli.64		q13,q4,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q5,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q1,q2,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d18,#14		@ from NEON_00_15
+	vadd.i64	q5,q14
+	vshr.u64	d25,d18,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d18,#41		@ from NEON_00_15
+	vadd.i64	q5,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d18,#50
+	vsli.64		d25,d18,#46
+	vmov		d29,d18
+	vsli.64		d26,d18,#23
+#if 26<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d19,d20		@ Ch(e,f,g)
+	vshr.u64	d24,d22,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d21
+	vshr.u64	d25,d22,#34
+	vsli.64		d24,d22,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d22,#39
+	vadd.i64	d28,d10
+	vsli.64		d25,d22,#30
+	veor		d30,d22,d23
+	vsli.64		d26,d22,#25
+	veor		d21,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d16,d23		@ Maj(a,b,c)
+	veor		d21,d26			@ Sigma0(a)
+	vadd.i64	d17,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d21,d30
+	vshr.u64	d24,d17,#14	@ 27
+#if 27<16
+	vld1.64		{d11},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d17,#18
+#if 27>0
+	 vadd.i64	d21,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d17,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d17,#50
+	vsli.64		d25,d17,#46
+	vmov		d29,d17
+	vsli.64		d26,d17,#23
+#if 27<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d18,d19		@ Ch(e,f,g)
+	vshr.u64	d24,d21,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d20
+	vshr.u64	d25,d21,#34
+	vsli.64		d24,d21,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d21,#39
+	vadd.i64	d28,d11
+	vsli.64		d25,d21,#30
+	veor		d30,d21,d22
+	vsli.64		d26,d21,#25
+	veor		d20,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d23,d22		@ Maj(a,b,c)
+	veor		d20,d26			@ Sigma0(a)
+	vadd.i64	d16,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d20,d30
+	vshr.u64	q12,q5,#19
+	vshr.u64	q13,q5,#61
+	 vadd.i64	d20,d30			@ h+=Maj from the past
+	vshr.u64	q15,q5,#6
+	vsli.64		q12,q5,#45
+	vext.8		q14,q6,q7,#8	@ X[i+1]
+	vsli.64		q13,q5,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q6,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q2,q3,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d16,#14		@ from NEON_00_15
+	vadd.i64	q6,q14
+	vshr.u64	d25,d16,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d16,#41		@ from NEON_00_15
+	vadd.i64	q6,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d16,#50
+	vsli.64		d25,d16,#46
+	vmov		d29,d16
+	vsli.64		d26,d16,#23
+#if 28<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d17,d18		@ Ch(e,f,g)
+	vshr.u64	d24,d20,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d19
+	vshr.u64	d25,d20,#34
+	vsli.64		d24,d20,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d20,#39
+	vadd.i64	d28,d12
+	vsli.64		d25,d20,#30
+	veor		d30,d20,d21
+	vsli.64		d26,d20,#25
+	veor		d19,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d22,d21		@ Maj(a,b,c)
+	veor		d19,d26			@ Sigma0(a)
+	vadd.i64	d23,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d19,d30
+	vshr.u64	d24,d23,#14	@ 29
+#if 29<16
+	vld1.64		{d13},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d23,#18
+#if 29>0
+	 vadd.i64	d19,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d23,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d23,#50
+	vsli.64		d25,d23,#46
+	vmov		d29,d23
+	vsli.64		d26,d23,#23
+#if 29<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d16,d17		@ Ch(e,f,g)
+	vshr.u64	d24,d19,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d18
+	vshr.u64	d25,d19,#34
+	vsli.64		d24,d19,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d19,#39
+	vadd.i64	d28,d13
+	vsli.64		d25,d19,#30
+	veor		d30,d19,d20
+	vsli.64		d26,d19,#25
+	veor		d18,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d21,d20		@ Maj(a,b,c)
+	veor		d18,d26			@ Sigma0(a)
+	vadd.i64	d22,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d18,d30
+	vshr.u64	q12,q6,#19
+	vshr.u64	q13,q6,#61
+	 vadd.i64	d18,d30			@ h+=Maj from the past
+	vshr.u64	q15,q6,#6
+	vsli.64		q12,q6,#45
+	vext.8		q14,q7,q0,#8	@ X[i+1]
+	vsli.64		q13,q6,#3
+	veor		q15,q12
+	vshr.u64	q12,q14,#1
+	veor		q15,q13				@ sigma1(X[i+14])
+	vshr.u64	q13,q14,#8
+	vadd.i64	q7,q15
+	vshr.u64	q15,q14,#7
+	vsli.64		q12,q14,#63
+	vsli.64		q13,q14,#56
+	vext.8		q14,q3,q4,#8	@ X[i+9]
+	veor		q15,q12
+	vshr.u64	d24,d22,#14		@ from NEON_00_15
+	vadd.i64	q7,q14
+	vshr.u64	d25,d22,#18		@ from NEON_00_15
+	veor		q15,q13				@ sigma0(X[i+1])
+	vshr.u64	d26,d22,#41		@ from NEON_00_15
+	vadd.i64	q7,q15
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d22,#50
+	vsli.64		d25,d22,#46
+	vmov		d29,d22
+	vsli.64		d26,d22,#23
+#if 30<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d23,d16		@ Ch(e,f,g)
+	vshr.u64	d24,d18,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d17
+	vshr.u64	d25,d18,#34
+	vsli.64		d24,d18,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d18,#39
+	vadd.i64	d28,d14
+	vsli.64		d25,d18,#30
+	veor		d30,d18,d19
+	vsli.64		d26,d18,#25
+	veor		d17,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d20,d19		@ Maj(a,b,c)
+	veor		d17,d26			@ Sigma0(a)
+	vadd.i64	d21,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d17,d30
+	vshr.u64	d24,d21,#14	@ 31
+#if 31<16
+	vld1.64		{d15},[r1]!	@ handles unaligned
+#endif
+	vshr.u64	d25,d21,#18
+#if 31>0
+	 vadd.i64	d17,d30			@ h+=Maj from the past
+#endif
+	vshr.u64	d26,d21,#41
+	vld1.64		{d28},[r3,:64]!	@ K[i++]
+	vsli.64		d24,d21,#50
+	vsli.64		d25,d21,#46
+	vmov		d29,d21
+	vsli.64		d26,d21,#23
+#if 31<16 && defined(__ARMEL__)
+	vrev64.8	,
+#endif
+	veor		d25,d24
+	vbsl		d29,d22,d23		@ Ch(e,f,g)
+	vshr.u64	d24,d17,#28
+	veor		d26,d25			@ Sigma1(e)
+	vadd.i64	d27,d29,d16
+	vshr.u64	d25,d17,#34
+	vsli.64		d24,d17,#36
+	vadd.i64	d27,d26
+	vshr.u64	d26,d17,#39
+	vadd.i64	d28,d15
+	vsli.64		d25,d17,#30
+	veor		d30,d17,d18
+	vsli.64		d26,d17,#25
+	veor		d16,d24,d25
+	vadd.i64	d27,d28
+	vbsl		d30,d19,d18		@ Maj(a,b,c)
+	veor		d16,d26			@ Sigma0(a)
+	vadd.i64	d20,d27
+	vadd.i64	d30,d27
+	@ vadd.i64	d16,d30
+	bne		.L16_79_neon
+
+	 vadd.i64	d16,d30		@ h+=Maj from the past
+	vldmia		r0,{d24-d31}	@ load context to temp
+	vadd.i64	q8,q12		@ vectorized accumulate
+	vadd.i64	q9,q13
+	vadd.i64	q10,q14
+	vadd.i64	q11,q15
+	vstmia		r0,{d16-d23}	@ save context
+	teq		r1,r2
+	sub		r3,#640	@ rewind K512
+	bne		.Loop_neon
+
+	VFP_ABI_POP
+	bx	lr				@ .word	0xe12fff1e
+.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
+#endif
+.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
+.align	2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm	OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c
new file mode 100644
index 0000000..269a394
--- /dev/null
+++ b/arch/arm/crypto/sha512-glue.c
@@ -0,0 +1,121 @@
+/*
+ * sha512-glue.c - accelerated SHA-384/512 for ARM
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_DESCRIPTION("Accelerated SHA-384/SHA-512 secure hash for ARM");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384-arm");
+MODULE_ALIAS_CRYPTO("sha512-arm");
+
+asmlinkage void sha512_block_data_order(u64 *state, u8 const *src, int blocks);
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+		      unsigned int len)
+{
+	return sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order);
+}
+
+int sha512_arm_final(struct shash_desc *desc, u8 *out)
+{
+	sha512_base_do_finalize(desc,
+		(sha512_block_fn *)sha512_block_data_order);
+	return sha512_base_finish(desc, out);
+}
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+		     unsigned int len, u8 *out)
+{
+	sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order);
+	return sha512_arm_final(desc, out);
+}
+
+static struct shash_alg sha512_arm_algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_arm_update,
+	.final			= sha512_arm_final,
+	.finup			= sha512_arm_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha384",
+		.cra_driver_name	= "sha384-arm",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+},  {
+	.init			= sha512_base_init,
+	.update			= sha512_arm_update,
+	.final			= sha512_arm_final,
+	.finup			= sha512_arm_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha512",
+		.cra_driver_name	= "sha512-arm",
+		.cra_priority		= 250,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
+
+static int __init sha512_arm_mod_init(void)
+{
+	int err;
+
+	err = crypto_register_shashes(sha512_arm_algs,
+				      ARRAY_SIZE(sha512_arm_algs));
+	if (err)
+		return err;
+
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+		err = crypto_register_shashes(sha512_neon_algs,
+					      ARRAY_SIZE(sha512_neon_algs));
+		if (err)
+			goto err_unregister;
+	}
+	return 0;
+
+err_unregister:
+	crypto_unregister_shashes(sha512_arm_algs,
+				  ARRAY_SIZE(sha512_arm_algs));
+
+	return err;
+}
+
+static void __exit sha512_arm_mod_fini(void)
+{
+	crypto_unregister_shashes(sha512_arm_algs,
+				  ARRAY_SIZE(sha512_arm_algs));
+	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+		crypto_unregister_shashes(sha512_neon_algs,
+					  ARRAY_SIZE(sha512_neon_algs));
+}
+
+module_init(sha512_arm_mod_init);
+module_exit(sha512_arm_mod_fini);
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
new file mode 100644
index 0000000..3269368
--- /dev/null
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -0,0 +1,98 @@
+/*
+ * sha512-neon-glue.c - accelerated SHA-384/512 for ARM NEON
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha512.h"
+
+MODULE_ALIAS_CRYPTO("sha384-neon");
+MODULE_ALIAS_CRYPTO("sha512-neon");
+
+asmlinkage void sha512_block_data_order_neon(u64 *state, u8 const *src,
+					     int blocks);
+
+static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
+			      unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+
+	if (!may_use_simd() ||
+	    (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+		return sha512_arm_update(desc, data, len);
+
+	kernel_neon_begin();
+	sha512_base_do_update(desc, data, len,
+		(sha512_block_fn *)sha512_block_data_order_neon);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
+			     unsigned int len, u8 *out)
+{
+	if (!may_use_simd())
+		return sha512_arm_finup(desc, data, len, out);
+
+	kernel_neon_begin();
+	if (len)
+		sha512_base_do_update(desc, data, len,
+			(sha512_block_fn *)sha512_block_data_order_neon);
+	sha512_base_do_finalize(desc,
+		(sha512_block_fn *)sha512_block_data_order_neon);
+	kernel_neon_end();
+
+	return sha512_base_finish(desc, out);
+}
+
+static int sha512_neon_final(struct shash_desc *desc, u8 *out)
+{
+	return sha512_neon_finup(desc, NULL, 0, out);
+}
+
+struct shash_alg sha512_neon_algs[] = { {
+	.init			= sha384_base_init,
+	.update			= sha512_neon_update,
+	.final			= sha512_neon_final,
+	.finup			= sha512_neon_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA384_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha384",
+		.cra_driver_name	= "sha384-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA384_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+
+	}
+},  {
+	.init			= sha512_base_init,
+	.update			= sha512_neon_update,
+	.final			= sha512_neon_final,
+	.finup			= sha512_neon_finup,
+	.descsize		= sizeof(struct sha512_state),
+	.digestsize		= SHA512_DIGEST_SIZE,
+	.base			= {
+		.cra_name		= "sha512",
+		.cra_driver_name	= "sha512-neon",
+		.cra_priority		= 300,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= SHA512_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+} };
diff --git a/arch/arm/crypto/sha512.h b/arch/arm/crypto/sha512.h
new file mode 100644
index 0000000..a75d9a8
--- /dev/null
+++ b/arch/arm/crypto/sha512.h
@@ -0,0 +1,8 @@
+
+int sha512_arm_update(struct shash_desc *desc, const u8 *data,
+		      unsigned int len);
+
+int sha512_arm_finup(struct shash_desc *desc, const u8 *data,
+		     unsigned int len, u8 *out);
+
+extern struct shash_alg sha512_neon_algs[2];
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
deleted file mode 100644
index b124dce..0000000
--- a/arch/arm/crypto/sha512_neon_glue.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- * Glue code for the SHA512 Secure Hash Algorithm assembly implementation
- * using NEON instructions.
- *
- * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * This file is based on sha512_ssse3_glue.c:
- *   Copyright (C) 2013 Intel Corporation
- *   Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-#include <asm/simd.h>
-#include <asm/neon.h>
-
-
-static const u64 sha512_k[] = {
-	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
-	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
-	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
-	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
-	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
-	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
-	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
-	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
-	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
-	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
-	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
-	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
-	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
-	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
-	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
-	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
-	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
-	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
-	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
-	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
-	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
-	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
-	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
-	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
-	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
-	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
-	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
-	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
-	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
-	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
-	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
-	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
-	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
-	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
-	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
-	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
-	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
-	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
-	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
-	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-
-asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
-				      const u64 k[], unsigned int num_blks);
-
-
-static int sha512_neon_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA512_H0;
-	sctx->state[1] = SHA512_H1;
-	sctx->state[2] = SHA512_H2;
-	sctx->state[3] = SHA512_H3;
-	sctx->state[4] = SHA512_H4;
-	sctx->state[5] = SHA512_H5;
-	sctx->state[6] = SHA512_H6;
-	sctx->state[7] = SHA512_H7;
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
-
-static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
-				unsigned int len, unsigned int partial)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int done = 0;
-
-	sctx->count[0] += len;
-	if (sctx->count[0] < len)
-		sctx->count[1]++;
-
-	if (partial) {
-		done = SHA512_BLOCK_SIZE - partial;
-		memcpy(sctx->buf + partial, data, done);
-		sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
-	}
-
-	if (len - done >= SHA512_BLOCK_SIZE) {
-		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
-
-		sha512_transform_neon(sctx->state, data + done, sha512_k,
-				      rounds);
-
-		done += rounds * SHA512_BLOCK_SIZE;
-	}
-
-	memcpy(sctx->buf, data + done, len - done);
-
-	return 0;
-}
-
-static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
-			     unsigned int len)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
-	int res;
-
-	/* Handle the fast case right here */
-	if (partial + len < SHA512_BLOCK_SIZE) {
-		sctx->count[0] += len;
-		if (sctx->count[0] < len)
-			sctx->count[1]++;
-		memcpy(sctx->buf + partial, data, len);
-
-		return 0;
-	}
-
-	if (!may_use_simd()) {
-		res = crypto_sha512_update(desc, data, len);
-	} else {
-		kernel_neon_begin();
-		res = __sha512_neon_update(desc, data, len, partial);
-		kernel_neon_end();
-	}
-
-	return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha512_neon_final(struct shash_desc *desc, u8 *out)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-	unsigned int i, index, padlen;
-	__be64 *dst = (__be64 *)out;
-	__be64 bits[2];
-	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
-	/* save number of bits */
-	bits[1] = cpu_to_be64(sctx->count[0] << 3);
-	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
-	/* Pad out to 112 mod 128 and append length */
-	index = sctx->count[0] & 0x7f;
-	padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
-	if (!may_use_simd()) {
-		crypto_sha512_update(desc, padding, padlen);
-		crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
-	} else {
-		kernel_neon_begin();
-		/* We need to fill a whole block for __sha512_neon_update() */
-		if (padlen <= 112) {
-			sctx->count[0] += padlen;
-			if (sctx->count[0] < padlen)
-				sctx->count[1]++;
-			memcpy(sctx->buf + index, padding, padlen);
-		} else {
-			__sha512_neon_update(desc, padding, padlen, index);
-		}
-		__sha512_neon_update(desc, (const u8 *)&bits,
-					sizeof(bits), 112);
-		kernel_neon_end();
-	}
-
-	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be64(sctx->state[i]);
-
-	/* Wipe context */
-	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_neon_export(struct shash_desc *desc, void *out)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_neon_import(struct shash_desc *desc, const void *in)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha384_neon_init(struct shash_desc *desc)
-{
-	struct sha512_state *sctx = shash_desc_ctx(desc);
-
-	sctx->state[0] = SHA384_H0;
-	sctx->state[1] = SHA384_H1;
-	sctx->state[2] = SHA384_H2;
-	sctx->state[3] = SHA384_H3;
-	sctx->state[4] = SHA384_H4;
-	sctx->state[5] = SHA384_H5;
-	sctx->state[6] = SHA384_H6;
-	sctx->state[7] = SHA384_H7;
-
-	sctx->count[0] = sctx->count[1] = 0;
-
-	return 0;
-}
-
-static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
-{
-	u8 D[SHA512_DIGEST_SIZE];
-
-	sha512_neon_final(desc, D);
-
-	memcpy(hash, D, SHA384_DIGEST_SIZE);
-	memzero_explicit(D, SHA512_DIGEST_SIZE);
-
-	return 0;
-}
-
-static struct shash_alg algs[] = { {
-	.digestsize	=	SHA512_DIGEST_SIZE,
-	.init		=	sha512_neon_init,
-	.update		=	sha512_neon_update,
-	.final		=	sha512_neon_final,
-	.export		=	sha512_neon_export,
-	.import		=	sha512_neon_import,
-	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
-	.base		=	{
-		.cra_name	=	"sha512",
-		.cra_driver_name =	"sha512-neon",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA512_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-},  {
-	.digestsize	=	SHA384_DIGEST_SIZE,
-	.init		=	sha384_neon_init,
-	.update		=	sha512_neon_update,
-	.final		=	sha384_neon_final,
-	.export		=	sha512_neon_export,
-	.import		=	sha512_neon_import,
-	.descsize	=	sizeof(struct sha512_state),
-	.statesize	=	sizeof(struct sha512_state),
-	.base		=	{
-		.cra_name	=	"sha384",
-		.cra_driver_name =	"sha384-neon",
-		.cra_priority	=	250,
-		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
-		.cra_blocksize	=	SHA384_BLOCK_SIZE,
-		.cra_module	=	THIS_MODULE,
-	}
-} };
-
-static int __init sha512_neon_mod_init(void)
-{
-	if (!cpu_has_neon())
-		return -ENODEV;
-
-	return crypto_register_shashes(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit sha512_neon_mod_fini(void)
-{
-	crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
-}
-
-module_init(sha512_neon_mod_init);
-module_exit(sha512_neon_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
-
-MODULE_ALIAS_CRYPTO("sha512");
-MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 6c348df..3303e8a 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -13,7 +13,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
-#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
 #include <linux/module.h>
 
 #include "aes-ce-setkey.h"
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index 12dccdb..af4c712 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -69,10 +69,10 @@ static int octeon_md5_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(0x67452301);
-	mctx->hash[1] = cpu_to_le32(0xefcdab89);
-	mctx->hash[2] = cpu_to_le32(0x98badcfe);
-	mctx->hash[3] = cpu_to_le32(0x10325476);
+	mctx->hash[0] = cpu_to_le32(MD5_H0);
+	mctx->hash[1] = cpu_to_le32(MD5_H1);
+	mctx->hash[2] = cpu_to_le32(MD5_H2);
+	mctx->hash[3] = cpu_to_le32(MD5_H3);
 	mctx->byte_count = 0;
 
 	return 0;
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
index 452fb4d..9228967 100644
--- a/arch/powerpc/crypto/md5-glue.c
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -37,10 +37,10 @@ static int ppc_md5_init(struct shash_desc *desc)
 {
 	struct md5_state *sctx = shash_desc_ctx(desc);
 
-	sctx->hash[0] = 0x67452301;
-	sctx->hash[1] = 0xefcdab89;
-	sctx->hash[2] = 0x98badcfe;
-	sctx->hash[3] =	0x10325476;
+	sctx->hash[0] = MD5_H0;
+	sctx->hash[1] = MD5_H1;
+	sctx->hash[2] = MD5_H2;
+	sctx->hash[3] =	MD5_H3;
 	sctx->byte_count = 0;
 
 	return 0;
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
new file mode 100644
index 0000000..9f8402b
--- /dev/null
+++ b/arch/powerpc/include/asm/icswx.h
@@ -0,0 +1,184 @@
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction.  This instruction is used to communicate with PowerPC
+ * coprocessors.  This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include <asm/ppc-opcode.h> /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE		(0x3fffffffffffffff)
+#define CCB_ADDRESS		(0xfffffffffffffff8)
+#define CCB_CM			(0x0000000000000007)
+#define CCB_CM0			(0x0000000000000004)
+#define CCB_CM12		(0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS	(0x0)
+#define CCB_CM0_LAST_IN_CHAIN	(0x4)
+#define CCB_CM12_STORE		(0x0)
+#define CCB_CM12_INTERRUPT	(0x1)
+
+#define CCB_SIZE		(0x10)
+#define CCB_ALIGN		CCB_SIZE
+
+struct coprocessor_completion_block {
+	__be64 value;
+	__be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V			(0x80)
+#define CSB_F			(0x04)
+#define CSB_CH			(0x03)
+#define CSB_CE_INCOMPLETE	(0x80)
+#define CSB_CE_TERMINATION	(0x40)
+#define CSB_CE_TPBC		(0x20)
+
+#define CSB_CC_SUCCESS		(0)
+#define CSB_CC_INVALID_ALIGN	(1)
+#define CSB_CC_OPERAND_OVERLAP	(2)
+#define CSB_CC_DATA_LENGTH	(3)
+#define CSB_CC_TRANSLATION	(5)
+#define CSB_CC_PROTECTION	(6)
+#define CSB_CC_RD_EXTERNAL	(7)
+#define CSB_CC_INVALID_OPERAND	(8)
+#define CSB_CC_PRIVILEGE	(9)
+#define CSB_CC_INTERNAL		(10)
+#define CSB_CC_WR_EXTERNAL	(12)
+#define CSB_CC_NOSPC		(13)
+#define CSB_CC_EXCESSIVE_DDE	(14)
+#define CSB_CC_WR_TRANSLATION	(15)
+#define CSB_CC_WR_PROTECTION	(16)
+#define CSB_CC_UNKNOWN_CODE	(17)
+#define CSB_CC_ABORT		(18)
+#define CSB_CC_TRANSPORT	(20)
+#define CSB_CC_SEGMENTED_DDL	(31)
+#define CSB_CC_PROGRESS_POINT	(32)
+#define CSB_CC_DDE_OVERFLOW	(33)
+#define CSB_CC_SESSION		(34)
+#define CSB_CC_PROVISION	(36)
+#define CSB_CC_CHAIN		(37)
+#define CSB_CC_SEQUENCE		(38)
+#define CSB_CC_HW		(39)
+
+#define CSB_SIZE		(0x10)
+#define CSB_ALIGN		CSB_SIZE
+
+struct coprocessor_status_block {
+	u8 flags;
+	u8 cs;
+	u8 cc;
+	u8 ce;
+	__be32 count;
+	__be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P			(0x8000)
+
+#define DDE_SIZE		(0x10)
+#define DDE_ALIGN		DDE_SIZE
+
+struct data_descriptor_entry {
+	__be16 flags;
+	u8 count;
+	u8 index;
+	__be32 length;
+	__be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE		(0x80)
+#define CRB_ALIGN		(0x100) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ *   ADDRESS	address of CSB
+ *   C		CCB is valid
+ *   AT		0 = addrs are virtual, 1 = addrs are phys
+ *   M		enable perf monitor
+ */
+#define CRB_CSB_ADDRESS		(0xfffffffffffffff0)
+#define CRB_CSB_C		(0x0000000000000008)
+#define CRB_CSB_AT		(0x0000000000000002)
+#define CRB_CSB_M		(0x0000000000000001)
+
+struct coprocessor_request_block {
+	__be32 ccw;
+	__be32 flags;
+	__be64 csb_addr;
+
+	struct data_descriptor_entry source;
+	struct data_descriptor_entry target;
+
+	struct coprocessor_completion_block ccb;
+
+	u8 reserved[48];
+
+	struct coprocessor_status_block csb;
+} __packed __aligned(CRB_ALIGN);
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS			(0xff000000)
+#define CCW_CT			(0x00ff0000)
+#define CCW_CD			(0x0000ffff)
+#define CCW_CL			(0x0000c000)
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX)
+ * Chapter 8.2.4.1 Condition Register 0
+ */
+
+#define ICSWX_INITIATED		(0x8)
+#define ICSWX_BUSY		(0x4)
+#define ICSWX_REJECTED		(0x2)
+
+static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
+{
+	__be64 ccw_reg = ccw;
+	u32 cr;
+
+	__asm__ __volatile__(
+	PPC_ICSWX(%1,0,%2) "\n"
+	"mfcr %0\n"
+	: "=r" (cr)
+	: "r" (ccw_reg), "r" (crb)
+	: "cr0", "memory");
+
+	return (int)((cr >> 28) & 0xf);
+}
+
+
+#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 5c93f69..8452335 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -136,6 +136,8 @@
 #define PPC_INST_DCBAL			0x7c2005ec
 #define PPC_INST_DCBZL			0x7c2007ec
 #define PPC_INST_ICBT			0x7c00002c
+#define PPC_INST_ICSWX			0x7c00032d
+#define PPC_INST_ICSWEPX		0x7c00076d
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
@@ -403,4 +405,15 @@
 #define MFTMR(tmr, r)		stringify_in_c(.long PPC_INST_MFTMR | \
 					       TMRN(tmr) | ___PPC_RT(r))
 
+/* Coprocessor instructions */
+#define PPC_ICSWX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWX |	\
+					       ___PPC_RS(s) |		\
+					       ___PPC_RA(a) |		\
+					       ___PPC_RB(b))
+#define PPC_ICSWEPX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWEPX | \
+					       ___PPC_RS(s) |		\
+					       ___PPC_RA(a) |		\
+					       ___PPC_RB(b))
+
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 308c5e1..ea2cea7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -800,6 +800,7 @@ int of_get_ibm_chip_id(struct device_node *np)
 	}
 	return -1;
 }
+EXPORT_SYMBOL(of_get_ibm_chip_id);
 
 /**
  * cpu_to_chip_id - Return the cpus chip-id
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index b688731..c9d2b92 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -33,10 +33,10 @@ static int md5_sparc64_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = cpu_to_le32(0x67452301);
-	mctx->hash[1] = cpu_to_le32(0xefcdab89);
-	mctx->hash[2] = cpu_to_le32(0x98badcfe);
-	mctx->hash[3] = cpu_to_le32(0x10325476);
+	mctx->hash[0] = cpu_to_le32(MD5_H0);
+	mctx->hash[1] = cpu_to_le32(MD5_H1);
+	mctx->hash[2] = cpu_to_le32(MD5_H2);
+	mctx->hash[3] = cpu_to_le32(MD5_H3);
 	mctx->byte_count = 0;
 
 	return 0;
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 112cefa..2ade2400 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -807,8 +807,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
 	child_ctx = aesni_rfc4106_gcm_ctx_get(cryptd_child);
 	memcpy(child_ctx, ctx, sizeof(*ctx));
 	ctx->cryptd_tfm = cryptd_tfm;
-	tfm->crt_aead.reqsize = sizeof(struct aead_request)
-		+ crypto_aead_reqsize(&cryptd_tfm->base);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct aead_request) +
+		crypto_aead_reqsize(&cryptd_tfm->base));
 	return 0;
 }
 
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index e510b1c..0cb5149 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -885,7 +885,8 @@ static int __init sha1_mb_mod_init(void)
 		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
 		cpu_state->cpu = cpu;
 		cpu_state->alg_state = &sha1_mb_alg_state;
-		cpu_state->mgr = (struct sha1_ctx_mgr *) kzalloc(sizeof(struct sha1_ctx_mgr), GFP_KERNEL);
+		cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr),
+					GFP_KERNEL);
 		if (!cpu_state->mgr)
 			goto err2;
 		sha1_ctx_mgr_init(cpu_state->mgr);
diff --git a/crypto/842.c b/crypto/842.c
index b48f4f1..98e387e 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -1,5 +1,5 @@
 /*
- * Cryptographic API for the 842 compression algorithm.
+ * Cryptographic API for the 842 software compression algorithm.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -11,173 +11,73 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * Copyright (C) IBM Corporation, 2011-2015
  *
- * Copyright (C) IBM Corporation, 2011
+ * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
  *
- * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
- *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ * Rewrite: Dan Streetman <ddstreet@ieee.org>
+ *
+ * This is the software implementation of compression and decompression using
+ * the 842 format.  This uses the software 842 library at lib/842/ which is
+ * only a reference implementation, and is very, very slow as compared to other
+ * software compressors.  You probably do not want to use this software
+ * compression.  If you have access to the PowerPC 842 compression hardware, you
+ * want to use the 842 hardware compression interface, which is at:
+ * drivers/crypto/nx/nx-842-crypto.c
  */
 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/crypto.h>
-#include <linux/vmalloc.h>
-#include <linux/nx842.h>
-#include <linux/lzo.h>
-#include <linux/timer.h>
-
-static int nx842_uselzo;
-
-struct nx842_ctx {
-	void *nx842_wmem; /* working memory for 842/lzo */
-};
+#include <linux/sw842.h>
 
-enum nx842_crypto_type {
-	NX842_CRYPTO_TYPE_842,
-	NX842_CRYPTO_TYPE_LZO
+struct crypto842_ctx {
+	char wmem[SW842_MEM_COMPRESS];	/* working memory for compress */
 };
 
-#define NX842_SENTINEL 0xdeadbeef
-
-struct nx842_crypto_header {
-	unsigned int sentinel; /* debug */
-	enum nx842_crypto_type type;
-};
-
-static int nx842_init(struct crypto_tfm *tfm)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	int wmemsize;
-
-	wmemsize = max_t(int, nx842_get_workmem_size(), LZO1X_MEM_COMPRESS);
-	ctx->nx842_wmem = kmalloc(wmemsize, GFP_NOFS);
-	if (!ctx->nx842_wmem)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void nx842_exit(struct crypto_tfm *tfm)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	kfree(ctx->nx842_wmem);
-}
-
-static void nx842_reset_uselzo(unsigned long data)
+static int crypto842_compress(struct crypto_tfm *tfm,
+			      const u8 *src, unsigned int slen,
+			      u8 *dst, unsigned int *dlen)
 {
-	nx842_uselzo = 0;
-}
-
-static DEFINE_TIMER(failover_timer, nx842_reset_uselzo, 0, 0);
-
-static int nx842_crypto_compress(struct crypto_tfm *tfm, const u8 *src,
-			    unsigned int slen, u8 *dst, unsigned int *dlen)
-{
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct nx842_crypto_header *hdr;
-	unsigned int tmp_len = *dlen;
-	size_t lzodlen; /* needed for lzo */
-	int err;
-
-	*dlen = 0;
-	hdr = (struct nx842_crypto_header *)dst;
-	hdr->sentinel = NX842_SENTINEL; /* debug */
-	dst += sizeof(struct nx842_crypto_header);
-	tmp_len -= sizeof(struct nx842_crypto_header);
-	lzodlen = tmp_len;
-
-	if (likely(!nx842_uselzo)) {
-		err = nx842_compress(src, slen, dst, &tmp_len, ctx->nx842_wmem);
-
-		if (likely(!err)) {
-			hdr->type = NX842_CRYPTO_TYPE_842;
-			*dlen = tmp_len + sizeof(struct nx842_crypto_header);
-			return 0;
-		}
-
-		/* hardware failed */
-		nx842_uselzo = 1;
+	struct crypto842_ctx *ctx = crypto_tfm_ctx(tfm);
 
-		/* set timer to check for hardware again in 1 second */
-		mod_timer(&failover_timer, jiffies + msecs_to_jiffies(1000));
-	}
-
-	/* no hardware, use lzo */
-	err = lzo1x_1_compress(src, slen, dst, &lzodlen, ctx->nx842_wmem);
-	if (err != LZO_E_OK)
-		return -EINVAL;
-
-	hdr->type = NX842_CRYPTO_TYPE_LZO;
-	*dlen = lzodlen + sizeof(struct nx842_crypto_header);
-	return 0;
+	return sw842_compress(src, slen, dst, dlen, ctx->wmem);
 }
 
-static int nx842_crypto_decompress(struct crypto_tfm *tfm, const u8 *src,
-			      unsigned int slen, u8 *dst, unsigned int *dlen)
+static int crypto842_decompress(struct crypto_tfm *tfm,
+				const u8 *src, unsigned int slen,
+				u8 *dst, unsigned int *dlen)
 {
-	struct nx842_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct nx842_crypto_header *hdr;
-	unsigned int tmp_len = *dlen;
-	size_t lzodlen; /* needed for lzo */
-	int err;
-
-	*dlen = 0;
-	hdr = (struct nx842_crypto_header *)src;
-
-	if (unlikely(hdr->sentinel != NX842_SENTINEL))
-		return -EINVAL;
-
-	src += sizeof(struct nx842_crypto_header);
-	slen -= sizeof(struct nx842_crypto_header);
-
-	if (likely(hdr->type == NX842_CRYPTO_TYPE_842)) {
-		err = nx842_decompress(src, slen, dst, &tmp_len,
-			ctx->nx842_wmem);
-		if (err)
-			return -EINVAL;
-		*dlen = tmp_len;
-	} else if (hdr->type == NX842_CRYPTO_TYPE_LZO) {
-		lzodlen = tmp_len;
-		err = lzo1x_decompress_safe(src, slen, dst, &lzodlen);
-		if (err != LZO_E_OK)
-			return -EINVAL;
-		*dlen = lzodlen;
-	} else
-		return -EINVAL;
-
-	return 0;
+	return sw842_decompress(src, slen, dst, dlen);
 }
 
 static struct crypto_alg alg = {
 	.cra_name		= "842",
+	.cra_driver_name	= "842-generic",
+	.cra_priority		= 100,
 	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
-	.cra_ctxsize		= sizeof(struct nx842_ctx),
+	.cra_ctxsize		= sizeof(struct crypto842_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_init		= nx842_init,
-	.cra_exit		= nx842_exit,
 	.cra_u			= { .compress = {
-	.coa_compress		= nx842_crypto_compress,
-	.coa_decompress		= nx842_crypto_decompress } }
+	.coa_compress		= crypto842_compress,
+	.coa_decompress		= crypto842_decompress } }
 };
 
-static int __init nx842_mod_init(void)
+static int __init crypto842_mod_init(void)
 {
-	del_timer(&failover_timer);
 	return crypto_register_alg(&alg);
 }
+module_init(crypto842_mod_init);
 
-static void __exit nx842_mod_exit(void)
+static void __exit crypto842_mod_exit(void)
 {
 	crypto_unregister_alg(&alg);
 }
-
-module_init(nx842_mod_init);
-module_exit(nx842_mod_exit);
+module_exit(crypto842_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("842 Compression Algorithm");
+MODULE_DESCRIPTION("842 Software Compression Algorithm");
 MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-generic");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 362905e..0ff4cd4 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -221,11 +221,22 @@ config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_NULL
 	select CRYPTO_RNG
 	help
 	  This IV generator generates an IV based on a sequence number by
 	  xoring it with a salt.  This algorithm is mainly useful for CTR
 
+config CRYPTO_ECHAINIV
+	tristate "Encrypted Chain IV Generator"
+	select CRYPTO_AEAD
+	select CRYPTO_NULL
+	select CRYPTO_RNG
+	help
+	  This IV generator generates an IV based on the encryption of
+	  a sequence number xored with a salt.  This is the default
+	  algorithm for CBC.
+
 comment "Block modes"
 
 config CRYPTO_CBC
@@ -1412,10 +1423,9 @@ config CRYPTO_LZO
 
 config CRYPTO_842
 	tristate "842 compression algorithm"
-	depends on CRYPTO_DEV_NX_COMPRESS
-	# 842 uses lzo if the hardware becomes unavailable
-	select LZO_COMPRESS
-	select LZO_DECOMPRESS
+	select CRYPTO_ALGAPI
+	select 842_COMPRESS
+	select 842_DECOMPRESS
 	help
 	  This is the 842 algorithm.
 
@@ -1479,9 +1489,19 @@ config CRYPTO_DRBG
 	tristate
 	default CRYPTO_DRBG_MENU if (CRYPTO_DRBG_HMAC || CRYPTO_DRBG_HASH || CRYPTO_DRBG_CTR)
 	select CRYPTO_RNG
+	select CRYPTO_JITTERENTROPY
 
 endif	# if CRYPTO_DRBG_MENU
 
+config CRYPTO_JITTERENTROPY
+	tristate "Jitterentropy Non-Deterministic Random Number Generator"
+	help
+	  The Jitterentropy RNG is a noise that is intended
+	  to provide seed to another RNG. The RNG does not
+	  perform any cryptographic whitening of the generated
+	  random numbers. This Jitterentropy RNG registers with
+	  the kernel crypto API and can be used by any caller.
+
 config CRYPTO_USER_API
 	tristate
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 97b7d3a..5db5b95 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
+obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
 
 crypto_hash-y += ahash.o
 crypto_hash-y += shash.o
@@ -94,6 +95,8 @@ obj-$(CONFIG_CRYPTO_RNG2) += rng.o
 obj-$(CONFIG_CRYPTO_RNG2) += krng.o
 obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o
 obj-$(CONFIG_CRYPTO_DRBG) += drbg.o
+CFLAGS_jitterentropy.o = -O0
+obj-$(CONFIG_CRYPTO_JITTERENTROPY) += jitterentropy.o
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
 obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
 obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index db201bca..b15d797 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -586,6 +586,13 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
 	if (!tmpl)
 		goto kill_larval;
 
+	if (tmpl->create) {
+		err = tmpl->create(tmpl, tb);
+		if (err)
+			goto put_tmpl;
+		goto ok;
+	}
+
 	inst = tmpl->alloc(tb);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
@@ -597,6 +604,7 @@ static int crypto_givcipher_default(struct crypto_alg *alg, u32 type, u32 mask)
 		goto put_tmpl;
 	}
 
+ok:
 	/* Redo the lookup to use the instance we just registered. */
 	err = -EAGAIN;
 
@@ -636,7 +644,7 @@ struct crypto_alg *crypto_lookup_skcipher(const char *name, u32 type, u32 mask)
 
 	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
 	    CRYPTO_ALG_TYPE_GIVCIPHER) {
-		if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) {
+		if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) {
 			crypto_mod_put(alg);
 			alg = ERR_PTR(-ENOENT);
 		}
diff --git a/crypto/aead.c b/crypto/aead.c
index 2222710..7c3d725 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -13,6 +13,7 @@
  */
 
 #include <crypto/internal/aead.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -26,10 +27,12 @@
 
 #include "internal.h"
 
+static int aead_null_givencrypt(struct aead_givcrypt_request *req);
+static int aead_null_givdecrypt(struct aead_givcrypt_request *req);
+
 static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
 			    unsigned int keylen)
 {
-	struct aead_alg *aead = crypto_aead_alg(tfm);
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
 	int ret;
 	u8 *buffer, *alignbuffer;
@@ -42,47 +45,94 @@ static int setkey_unaligned(struct crypto_aead *tfm, const u8 *key,
 
 	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
 	memcpy(alignbuffer, key, keylen);
-	ret = aead->setkey(tfm, alignbuffer, keylen);
+	ret = tfm->setkey(tfm, alignbuffer, keylen);
 	memset(alignbuffer, 0, keylen);
 	kfree(buffer);
 	return ret;
 }
 
-static int setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
+int crypto_aead_setkey(struct crypto_aead *tfm,
+		       const u8 *key, unsigned int keylen)
 {
-	struct aead_alg *aead = crypto_aead_alg(tfm);
 	unsigned long alignmask = crypto_aead_alignmask(tfm);
 
+	tfm = tfm->child;
+
 	if ((unsigned long)key & alignmask)
 		return setkey_unaligned(tfm, key, keylen);
 
-	return aead->setkey(tfm, key, keylen);
+	return tfm->setkey(tfm, key, keylen);
 }
+EXPORT_SYMBOL_GPL(crypto_aead_setkey);
 
 int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 {
-	struct aead_tfm *crt = crypto_aead_crt(tfm);
 	int err;
 
-	if (authsize > crypto_aead_alg(tfm)->maxauthsize)
+	if (authsize > crypto_aead_maxauthsize(tfm))
 		return -EINVAL;
 
-	if (crypto_aead_alg(tfm)->setauthsize) {
-		err = crypto_aead_alg(tfm)->setauthsize(crt->base, authsize);
+	if (tfm->setauthsize) {
+		err = tfm->setauthsize(tfm->child, authsize);
 		if (err)
 			return err;
 	}
 
-	crypto_aead_crt(crt->base)->authsize = authsize;
-	crt->authsize = authsize;
+	tfm->child->authsize = authsize;
+	tfm->authsize = authsize;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_aead_setauthsize);
 
-static unsigned int crypto_aead_ctxsize(struct crypto_alg *alg, u32 type,
-					u32 mask)
+struct aead_old_request {
+	struct scatterlist srcbuf[2];
+	struct scatterlist dstbuf[2];
+	struct aead_request subreq;
+};
+
+unsigned int crypto_aead_reqsize(struct crypto_aead *tfm)
+{
+	return tfm->reqsize + sizeof(struct aead_old_request);
+}
+EXPORT_SYMBOL_GPL(crypto_aead_reqsize);
+
+static int old_crypt(struct aead_request *req,
+		     int (*crypt)(struct aead_request *req))
 {
-	return alg->cra_ctxsize;
+	struct aead_old_request *nreq = aead_request_ctx(req);
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct scatterlist *src, *dst;
+
+	if (req->old)
+		return crypt(req);
+
+	src = scatterwalk_ffwd(nreq->srcbuf, req->src, req->assoclen);
+	dst = scatterwalk_ffwd(nreq->dstbuf, req->dst, req->assoclen);
+
+	aead_request_set_tfm(&nreq->subreq, aead);
+	aead_request_set_callback(&nreq->subreq, aead_request_flags(req),
+				  req->base.complete, req->base.data);
+	aead_request_set_crypt(&nreq->subreq, src, dst, req->cryptlen,
+			       req->iv);
+	aead_request_set_assoc(&nreq->subreq, req->src, req->assoclen);
+
+	return crypt(&nreq->subreq);
+}
+
+static int old_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct old_aead_alg *alg = crypto_old_aead_alg(aead);
+
+	return old_crypt(req, alg->encrypt);
+}
+
+static int old_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct old_aead_alg *alg = crypto_old_aead_alg(aead);
+
+	return old_crypt(req, alg->decrypt);
 }
 
 static int no_givcrypt(struct aead_givcrypt_request *req)
@@ -90,32 +140,54 @@ static int no_givcrypt(struct aead_givcrypt_request *req)
 	return -ENOSYS;
 }
 
-static int crypto_init_aead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+static int crypto_old_aead_init_tfm(struct crypto_tfm *tfm)
 {
-	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
-	struct aead_tfm *crt = &tfm->crt_aead;
+	struct old_aead_alg *alg = &tfm->__crt_alg->cra_aead;
+	struct crypto_aead *crt = __crypto_aead_cast(tfm);
 
 	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
 		return -EINVAL;
 
-	crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
-		      alg->setkey : setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-	crt->givencrypt = alg->givencrypt ?: no_givcrypt;
-	crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
-	crt->base = __crypto_aead_cast(tfm);
-	crt->ivsize = alg->ivsize;
+	crt->setkey = alg->setkey;
+	crt->setauthsize = alg->setauthsize;
+	crt->encrypt = old_encrypt;
+	crt->decrypt = old_decrypt;
+	if (alg->ivsize) {
+		crt->givencrypt = alg->givencrypt ?: no_givcrypt;
+		crt->givdecrypt = alg->givdecrypt ?: no_givcrypt;
+	} else {
+		crt->givencrypt = aead_null_givencrypt;
+		crt->givdecrypt = aead_null_givdecrypt;
+	}
+	crt->child = __crypto_aead_cast(tfm);
 	crt->authsize = alg->maxauthsize;
 
 	return 0;
 }
 
+static int crypto_aead_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *aead = __crypto_aead_cast(tfm);
+	struct aead_alg *alg = crypto_aead_alg(aead);
+
+	if (crypto_old_aead_alg(aead)->encrypt)
+		return crypto_old_aead_init_tfm(tfm);
+
+	aead->setkey = alg->setkey;
+	aead->setauthsize = alg->setauthsize;
+	aead->encrypt = alg->encrypt;
+	aead->decrypt = alg->decrypt;
+	aead->child = __crypto_aead_cast(tfm);
+	aead->authsize = alg->maxauthsize;
+
+	return 0;
+}
+
 #ifdef CONFIG_NET
-static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_aead raead;
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	strncpy(raead.type, "aead", sizeof(raead.type));
 	strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv));
@@ -133,6 +205,64 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 #else
+static int crypto_old_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	return -ENOSYS;
+}
+#endif
+
+static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg)
+	__attribute__ ((unused));
+static void crypto_old_aead_show(struct seq_file *m, struct crypto_alg *alg)
+{
+	struct old_aead_alg *aead = &alg->cra_aead;
+
+	seq_printf(m, "type         : aead\n");
+	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
+					     "yes" : "no");
+	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
+	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
+	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
+	seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
+}
+
+const struct crypto_type crypto_aead_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
+#ifdef CONFIG_PROC_FS
+	.show = crypto_old_aead_show,
+#endif
+	.report = crypto_old_aead_report,
+	.lookup = crypto_lookup_aead,
+	.maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV),
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
+};
+EXPORT_SYMBOL_GPL(crypto_aead_type);
+
+#ifdef CONFIG_NET
+static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_aead raead;
+	struct aead_alg *aead = container_of(alg, struct aead_alg, base);
+
+	strncpy(raead.type, "aead", sizeof(raead.type));
+	strncpy(raead.geniv, "<none>", sizeof(raead.geniv));
+
+	raead.blocksize = alg->cra_blocksize;
+	raead.maxauthsize = aead->maxauthsize;
+	raead.ivsize = aead->ivsize;
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_AEAD,
+		    sizeof(struct crypto_report_aead), &raead))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+#else
 static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	return -ENOSYS;
@@ -143,7 +273,7 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 	__attribute__ ((unused));
 static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 {
-	struct aead_alg *aead = &alg->cra_aead;
+	struct aead_alg *aead = container_of(alg, struct aead_alg, base);
 
 	seq_printf(m, "type         : aead\n");
 	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
@@ -151,18 +281,21 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	seq_printf(m, "ivsize       : %u\n", aead->ivsize);
 	seq_printf(m, "maxauthsize  : %u\n", aead->maxauthsize);
-	seq_printf(m, "geniv        : %s\n", aead->geniv ?: "<built-in>");
+	seq_printf(m, "geniv        : <none>\n");
 }
 
-const struct crypto_type crypto_aead_type = {
-	.ctxsize = crypto_aead_ctxsize,
-	.init = crypto_init_aead_ops,
+static const struct crypto_type crypto_new_aead_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_aead_show,
 #endif
 	.report = crypto_aead_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
 };
-EXPORT_SYMBOL_GPL(crypto_aead_type);
 
 static int aead_null_givencrypt(struct aead_givcrypt_request *req)
 {
@@ -174,33 +307,11 @@ static int aead_null_givdecrypt(struct aead_givcrypt_request *req)
 	return crypto_aead_decrypt(&req->areq);
 }
 
-static int crypto_init_nivaead_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	struct aead_alg *alg = &tfm->__crt_alg->cra_aead;
-	struct aead_tfm *crt = &tfm->crt_aead;
-
-	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	crt->setkey = setkey;
-	crt->encrypt = alg->encrypt;
-	crt->decrypt = alg->decrypt;
-	if (!alg->ivsize) {
-		crt->givencrypt = aead_null_givencrypt;
-		crt->givdecrypt = aead_null_givdecrypt;
-	}
-	crt->base = __crypto_aead_cast(tfm);
-	crt->ivsize = alg->ivsize;
-	crt->authsize = alg->maxauthsize;
-
-	return 0;
-}
-
 #ifdef CONFIG_NET
 static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_aead raead;
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	strncpy(raead.type, "nivaead", sizeof(raead.type));
 	strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv));
@@ -229,7 +340,7 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 	__attribute__ ((unused));
 static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 {
-	struct aead_alg *aead = &alg->cra_aead;
+	struct old_aead_alg *aead = &alg->cra_aead;
 
 	seq_printf(m, "type         : nivaead\n");
 	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
@@ -241,43 +352,36 @@ static void crypto_nivaead_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 const struct crypto_type crypto_nivaead_type = {
-	.ctxsize = crypto_aead_ctxsize,
-	.init = crypto_init_nivaead_ops,
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_aead_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_nivaead_show,
 #endif
 	.report = crypto_nivaead_report,
+	.maskclear = ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV),
+	.maskset = CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV,
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.tfmsize = offsetof(struct crypto_aead, base),
 };
 EXPORT_SYMBOL_GPL(crypto_nivaead_type);
 
 static int crypto_grab_nivaead(struct crypto_aead_spawn *spawn,
 			       const char *name, u32 type, u32 mask)
 {
-	struct crypto_alg *alg;
-	int err;
-
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask |= CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV;
-
-	alg = crypto_alg_mod_lookup(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
-
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
-	return err;
+	spawn->base.frontend = &crypto_nivaead_type;
+	return crypto_grab_spawn(&spawn->base, name, type, mask);
 }
 
-struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-					 struct rtattr **tb, u32 type,
-					 u32 mask)
+struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+				       struct rtattr **tb, u32 type, u32 mask)
 {
 	const char *name;
 	struct crypto_aead_spawn *spawn;
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
-	struct crypto_alg *alg;
+	struct aead_instance *inst;
+	struct aead_alg *alg;
+	unsigned int ivsize;
+	unsigned int maxauthsize;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
@@ -296,20 +400,23 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 	if (!inst)
 		return ERR_PTR(-ENOMEM);
 
-	spawn = crypto_instance_ctx(inst);
+	spawn = aead_instance_ctx(inst);
 
 	/* Ignore async algorithms if necessary. */
 	mask |= crypto_requires_sync(algt->type, algt->mask);
 
-	crypto_set_aead_spawn(spawn, inst);
+	crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
 	err = crypto_grab_nivaead(spawn, name, type, mask);
 	if (err)
 		goto err_free_inst;
 
-	alg = crypto_aead_spawn_alg(spawn);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	ivsize = crypto_aead_alg_ivsize(alg);
+	maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
 	err = -EINVAL;
-	if (!alg->cra_aead.ivsize)
+	if (!ivsize)
 		goto err_drop_alg;
 
 	/*
@@ -318,39 +425,54 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
 	 * template name and double-check the IV generator.
 	 */
 	if (algt->mask & CRYPTO_ALG_GENIV) {
-		if (strcmp(tmpl->name, alg->cra_aead.geniv))
+		if (!alg->base.cra_aead.encrypt)
+			goto err_drop_alg;
+		if (strcmp(tmpl->name, alg->base.cra_aead.geniv))
 			goto err_drop_alg;
 
-		memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-		memcpy(inst->alg.cra_driver_name, alg->cra_driver_name,
+		memcpy(inst->alg.base.cra_name, alg->base.cra_name,
 		       CRYPTO_MAX_ALG_NAME);
-	} else {
-		err = -ENAMETOOLONG;
-		if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
-		if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-			     "%s(%s)", tmpl->name, alg->cra_driver_name) >=
-		    CRYPTO_MAX_ALG_NAME)
-			goto err_drop_alg;
+		memcpy(inst->alg.base.cra_driver_name,
+		       alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME);
+
+		inst->alg.base.cra_flags = CRYPTO_ALG_TYPE_AEAD |
+					   CRYPTO_ALG_GENIV;
+		inst->alg.base.cra_flags |= alg->base.cra_flags &
+					    CRYPTO_ALG_ASYNC;
+		inst->alg.base.cra_priority = alg->base.cra_priority;
+		inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+		inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
+		inst->alg.base.cra_type = &crypto_aead_type;
+
+		inst->alg.base.cra_aead.ivsize = ivsize;
+		inst->alg.base.cra_aead.maxauthsize = maxauthsize;
+
+		inst->alg.base.cra_aead.setkey = alg->base.cra_aead.setkey;
+		inst->alg.base.cra_aead.setauthsize =
+			alg->base.cra_aead.setauthsize;
+		inst->alg.base.cra_aead.encrypt = alg->base.cra_aead.encrypt;
+		inst->alg.base.cra_aead.decrypt = alg->base.cra_aead.decrypt;
+
+		goto out;
 	}
 
-	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV;
-	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-	inst->alg.cra_priority = alg->cra_priority;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-	inst->alg.cra_type = &crypto_aead_type;
+	err = -ENAMETOOLONG;
+	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
+	if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "%s(%s)", tmpl->name, alg->base.cra_driver_name) >=
+	    CRYPTO_MAX_ALG_NAME)
+		goto err_drop_alg;
 
-	inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
-	inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
-	inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+	inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+	inst->alg.base.cra_priority = alg->base.cra_priority;
+	inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-	inst->alg.cra_aead.setkey = alg->cra_aead.setkey;
-	inst->alg.cra_aead.setauthsize = alg->cra_aead.setauthsize;
-	inst->alg.cra_aead.encrypt = alg->cra_aead.encrypt;
-	inst->alg.cra_aead.decrypt = alg->cra_aead.decrypt;
+	inst->alg.ivsize = ivsize;
+	inst->alg.maxauthsize = maxauthsize;
 
 out:
 	return inst;
@@ -364,9 +486,9 @@ err_free_inst:
 }
 EXPORT_SYMBOL_GPL(aead_geniv_alloc);
 
-void aead_geniv_free(struct crypto_instance *inst)
+void aead_geniv_free(struct aead_instance *inst)
 {
-	crypto_drop_aead(crypto_instance_ctx(inst));
+	crypto_drop_aead(aead_instance_ctx(inst));
 	kfree(inst);
 }
 EXPORT_SYMBOL_GPL(aead_geniv_free);
@@ -374,14 +496,17 @@ EXPORT_SYMBOL_GPL(aead_geniv_free);
 int aead_geniv_init(struct crypto_tfm *tfm)
 {
 	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_aead *child;
 	struct crypto_aead *aead;
 
-	aead = crypto_spawn_aead(crypto_instance_ctx(inst));
-	if (IS_ERR(aead))
-		return PTR_ERR(aead);
+	aead = __crypto_aead_cast(tfm);
+
+	child = crypto_spawn_aead(crypto_instance_ctx(inst));
+	if (IS_ERR(child))
+		return PTR_ERR(child);
 
-	tfm->crt_aead.base = aead;
-	tfm->crt_aead.reqsize += crypto_aead_reqsize(aead);
+	aead->child = child;
+	aead->reqsize += crypto_aead_reqsize(child);
 
 	return 0;
 }
@@ -389,7 +514,7 @@ EXPORT_SYMBOL_GPL(aead_geniv_init);
 
 void aead_geniv_exit(struct crypto_tfm *tfm)
 {
-	crypto_free_aead(tfm->crt_aead.base);
+	crypto_free_aead(__crypto_aead_cast(tfm)->child);
 }
 EXPORT_SYMBOL_GPL(aead_geniv_exit);
 
@@ -443,6 +568,13 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
 	if (!tmpl)
 		goto kill_larval;
 
+	if (tmpl->create) {
+		err = tmpl->create(tmpl, tb);
+		if (err)
+			goto put_tmpl;
+		goto ok;
+	}
+
 	inst = tmpl->alloc(tb);
 	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
@@ -454,6 +586,7 @@ static int crypto_nivaead_default(struct crypto_alg *alg, u32 type, u32 mask)
 		goto put_tmpl;
 	}
 
+ok:
 	/* Redo the lookup to use the instance we just registered. */
 	err = -EAGAIN;
 
@@ -489,7 +622,7 @@ struct crypto_alg *crypto_lookup_aead(const char *name, u32 type, u32 mask)
 		return alg;
 
 	if (alg->cra_type == &crypto_aead_type) {
-		if ((alg->cra_flags ^ type ^ ~mask) & CRYPTO_ALG_TESTED) {
+		if (~alg->cra_flags & (type ^ ~mask) & CRYPTO_ALG_TESTED) {
 			crypto_mod_put(alg);
 			alg = ERR_PTR(-ENOENT);
 		}
@@ -505,62 +638,62 @@ EXPORT_SYMBOL_GPL(crypto_lookup_aead);
 int crypto_grab_aead(struct crypto_aead_spawn *spawn, const char *name,
 		     u32 type, u32 mask)
 {
-	struct crypto_alg *alg;
-	int err;
+	spawn->base.frontend = &crypto_aead_type;
+	return crypto_grab_spawn(&spawn->base, name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_grab_aead);
 
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	mask |= CRYPTO_ALG_TYPE_MASK;
+struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_aead_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_aead);
 
-	alg = crypto_lookup_aead(name, type, mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
+static int aead_prepare_alg(struct aead_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
 
-	err = crypto_init_spawn(&spawn->base, alg, spawn->base.inst, mask);
-	crypto_mod_put(alg);
-	return err;
+	if (max(alg->maxauthsize, alg->ivsize) > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_new_aead_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_AEAD;
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(crypto_grab_aead);
 
-struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask)
+int crypto_register_aead(struct aead_alg *alg)
 {
-	struct crypto_tfm *tfm;
+	struct crypto_alg *base = &alg->base;
 	int err;
 
-	type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	type |= CRYPTO_ALG_TYPE_AEAD;
-	mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV);
-	mask |= CRYPTO_ALG_TYPE_MASK;
-
-	for (;;) {
-		struct crypto_alg *alg;
+	err = aead_prepare_alg(alg);
+	if (err)
+		return err;
 
-		alg = crypto_lookup_aead(alg_name, type, mask);
-		if (IS_ERR(alg)) {
-			err = PTR_ERR(alg);
-			goto err;
-		}
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_aead);
 
-		tfm = __crypto_alloc_tfm(alg, type, mask);
-		if (!IS_ERR(tfm))
-			return __crypto_aead_cast(tfm);
+int crypto_unregister_aead(struct aead_alg *alg)
+{
+	return crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_aead);
 
-		crypto_mod_put(alg);
-		err = PTR_ERR(tfm);
+int aead_register_instance(struct crypto_template *tmpl,
+			   struct aead_instance *inst)
+{
+	int err;
 
-err:
-		if (err != -EAGAIN)
-			break;
-		if (signal_pending(current)) {
-			err = -EINTR;
-			break;
-		}
-	}
+	err = aead_prepare_alg(&inst->alg);
+	if (err)
+		return err;
 
-	return ERR_PTR(err);
+	return crypto_register_instance(tmpl, aead_crypto_instance(inst));
 }
-EXPORT_SYMBOL_GPL(crypto_alloc_aead);
+EXPORT_SYMBOL_GPL(aead_register_instance);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Authenticated Encryption with Associated Data (AEAD)");
diff --git a/crypto/algapi.c b/crypto/algapi.c
index d2627a3..abf100c 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -12,6 +12,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -43,12 +44,9 @@ static inline int crypto_set_driver_name(struct crypto_alg *alg)
 
 static inline void crypto_check_module_sig(struct module *mod)
 {
-#ifdef CONFIG_CRYPTO_FIPS
-	if (fips_enabled && mod && !mod->sig_ok)
+	if (fips_enabled && mod && !module_sig_ok(mod))
 		panic("Module %s signature verification failed in FIPS mode\n",
-		      mod->name);
-#endif
-	return;
+		      module_name(mod));
 }
 
 static int crypto_check_alg(struct crypto_alg *alg)
@@ -614,6 +612,22 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_init_spawn2);
 
+int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name,
+		      u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+	int err;
+
+	alg = crypto_find_alg(name, spawn->frontend, type, mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	err = crypto_init_spawn(spawn, alg, spawn->inst, mask);
+	crypto_mod_put(alg);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_grab_spawn);
+
 void crypto_drop_spawn(struct crypto_spawn *spawn)
 {
 	if (!spawn->alg)
@@ -964,6 +978,12 @@ void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(crypto_xor);
 
+unsigned int crypto_alg_extsize(struct crypto_alg *alg)
+{
+	return alg->cra_ctxsize;
+}
+EXPORT_SYMBOL_GPL(crypto_alg_extsize);
+
 static int __init crypto_algapi_init(void)
 {
 	crypto_init_proc();
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 69abada..a55e4e6 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -13,6 +13,7 @@
  * any later version.
  */
 
+#include <crypto/aead.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/if_alg.h>
 #include <linux/init.h>
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 8109aaa..150c2b6 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -164,7 +164,7 @@ static int rng_setkey(void *private, const u8 *seed, unsigned int seedlen)
 	 * Check whether seedlen is of sufficient size is done in RNG
 	 * implementations.
 	 */
-	return crypto_rng_reset(private, (u8 *)seed, seedlen);
+	return crypto_rng_reset(private, seed, seedlen);
 }
 
 static const struct af_alg_type algif_type_rng = {
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 765fe76..eff337c 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -20,8 +20,6 @@
 #include <linux/moduleparam.h>
 #include <linux/string.h>
 
-#include "internal.h"
-
 #define DEFAULT_PRNG_KEY "0123456789abcdef"
 #define DEFAULT_PRNG_KSZ 16
 #define DEFAULT_BLK_SZ 16
@@ -281,11 +279,11 @@ static void free_prng_context(struct prng_context *ctx)
 }
 
 static int reset_prng_context(struct prng_context *ctx,
-			      unsigned char *key, size_t klen,
-			      unsigned char *V, unsigned char *DT)
+			      const unsigned char *key, size_t klen,
+			      const unsigned char *V, const unsigned char *DT)
 {
 	int ret;
-	unsigned char *prng_key;
+	const unsigned char *prng_key;
 
 	spin_lock_bh(&ctx->prng_lock);
 	ctx->flags |= PRNG_NEED_RESET;
@@ -353,8 +351,9 @@ static void cprng_exit(struct crypto_tfm *tfm)
 	free_prng_context(crypto_tfm_ctx(tfm));
 }
 
-static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
-			    unsigned int dlen)
+static int cprng_get_random(struct crypto_rng *tfm,
+			    const u8 *src, unsigned int slen,
+			    u8 *rdata, unsigned int dlen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
 
@@ -367,11 +366,12 @@ static int cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
  *  V and KEY are required during reset, and DT is optional, detected
  *  as being present by testing the length of the seed
  */
-static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int cprng_reset(struct crypto_rng *tfm,
+		       const u8 *seed, unsigned int slen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
-	u8 *key = seed + DEFAULT_BLK_SZ;
-	u8 *dt = NULL;
+	const u8 *key = seed + DEFAULT_BLK_SZ;
+	const u8 *dt = NULL;
 
 	if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ)
 		return -EINVAL;
@@ -387,18 +387,20 @@ static int cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 }
 
 #ifdef CONFIG_CRYPTO_FIPS
-static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
-			    unsigned int dlen)
+static int fips_cprng_get_random(struct crypto_rng *tfm,
+				 const u8 *src, unsigned int slen,
+				 u8 *rdata, unsigned int dlen)
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
 
 	return get_prng_bytes(rdata, dlen, prng, 1);
 }
 
-static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int fips_cprng_reset(struct crypto_rng *tfm,
+			    const u8 *seed, unsigned int slen)
 {
 	u8 rdata[DEFAULT_BLK_SZ];
-	u8 *key = seed + DEFAULT_BLK_SZ;
+	const u8 *key = seed + DEFAULT_BLK_SZ;
 	int rc;
 
 	struct prng_context *prng = crypto_rng_ctx(tfm);
@@ -424,40 +426,32 @@ out:
 }
 #endif
 
-static struct crypto_alg rng_algs[] = { {
-	.cra_name		= "stdrng",
-	.cra_driver_name	= "ansi_cprng",
-	.cra_priority		= 100,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= sizeof(struct prng_context),
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= cprng_init,
-	.cra_exit		= cprng_exit,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= cprng_get_random,
-			.rng_reset		= cprng_reset,
-			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
-		}
+static struct rng_alg rng_algs[] = { {
+	.generate		= cprng_get_random,
+	.seed			= cprng_reset,
+	.seedsize		= DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ,
+	.base			=	{
+		.cra_name		= "stdrng",
+		.cra_driver_name	= "ansi_cprng",
+		.cra_priority		= 100,
+		.cra_ctxsize		= sizeof(struct prng_context),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= cprng_init,
+		.cra_exit		= cprng_exit,
 	}
 #ifdef CONFIG_CRYPTO_FIPS
 }, {
-	.cra_name		= "fips(ansi_cprng)",
-	.cra_driver_name	= "fips_ansi_cprng",
-	.cra_priority		= 300,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= sizeof(struct prng_context),
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_init		= cprng_init,
-	.cra_exit		= cprng_exit,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= fips_cprng_get_random,
-			.rng_reset		= fips_cprng_reset,
-			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
-		}
+	.generate		= fips_cprng_get_random,
+	.seed			= fips_cprng_reset,
+	.seedsize		= DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ,
+	.base			=	{
+		.cra_name		= "fips(ansi_cprng)",
+		.cra_driver_name	= "fips_ansi_cprng",
+		.cra_priority		= 300,
+		.cra_ctxsize		= sizeof(struct prng_context),
+		.cra_module		= THIS_MODULE,
+		.cra_init		= cprng_init,
+		.cra_exit		= cprng_exit,
 	}
 #endif
 } };
@@ -465,12 +459,12 @@ static struct crypto_alg rng_algs[] = { {
 /* Module initalization */
 static int __init prng_mod_init(void)
 {
-	return crypto_register_algs(rng_algs, ARRAY_SIZE(rng_algs));
+	return crypto_register_rngs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 static void __exit prng_mod_fini(void)
 {
-	crypto_unregister_algs(rng_algs, ARRAY_SIZE(rng_algs));
+	crypto_unregister_rngs(rng_algs, ARRAY_SIZE(rng_algs));
 }
 
 MODULE_LICENSE("GPL");
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 78fb16c..3e85229 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -10,7 +10,7 @@
  *
  */
 
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
@@ -570,13 +570,14 @@ static int crypto_authenc_init_tfm(struct crypto_tfm *tfm)
 			    crypto_ahash_alignmask(auth) + 1) +
 		      crypto_ablkcipher_ivsize(enc);
 
-	tfm->crt_aead.reqsize = sizeof(struct authenc_request_ctx) +
-				ctx->reqoff +
-				max_t(unsigned int,
-				crypto_ahash_reqsize(auth) +
-				sizeof(struct ahash_request),
-				sizeof(struct skcipher_givcrypt_request) +
-				crypto_ablkcipher_reqsize(enc));
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct authenc_request_ctx) +
+		ctx->reqoff +
+		max_t(unsigned int,
+			crypto_ahash_reqsize(auth) +
+			sizeof(struct ahash_request),
+			sizeof(struct skcipher_givcrypt_request) +
+			crypto_ablkcipher_reqsize(enc)));
 
 	return 0;
 
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 024bff2..a3da677 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -12,7 +12,7 @@
  *
  */
 
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/authenc.h>
@@ -662,13 +662,14 @@ static int crypto_authenc_esn_init_tfm(struct crypto_tfm *tfm)
 			    crypto_ahash_alignmask(auth) + 1) +
 		      crypto_ablkcipher_ivsize(enc);
 
-	tfm->crt_aead.reqsize = sizeof(struct authenc_esn_request_ctx) +
-				ctx->reqoff +
-				max_t(unsigned int,
-				crypto_ahash_reqsize(auth) +
-				sizeof(struct ahash_request),
-				sizeof(struct skcipher_givcrypt_request) +
-				crypto_ablkcipher_reqsize(enc));
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct authenc_esn_request_ctx) +
+		ctx->reqoff +
+		max_t(unsigned int,
+			crypto_ahash_reqsize(auth) +
+			sizeof(struct ahash_request),
+			sizeof(struct skcipher_givcrypt_request) +
+			crypto_ablkcipher_reqsize(enc)));
 
 	return 0;
 
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 0122bec..11b9814 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -14,6 +14,7 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/errno.h>
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 003bbbd..a4d1a5e 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -453,9 +453,9 @@ static int crypto_ccm_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_tfm_alg_alignmask(tfm);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = align +
-				sizeof(struct crypto_ccm_req_priv_ctx) +
-				crypto_ablkcipher_reqsize(ctr);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		align + sizeof(struct crypto_ccm_req_priv_ctx) +
+		crypto_ablkcipher_reqsize(ctr));
 
 	return 0;
 
@@ -729,10 +729,10 @@ static int crypto_rfc4309_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct aead_request) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct aead_request) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 16);
 
 	return 0;
 }
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index b0602ba..4264c8d 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -295,6 +295,23 @@ static void cryptd_blkcipher_exit_tfm(struct crypto_tfm *tfm)
 	crypto_free_blkcipher(ctx->child);
 }
 
+static int cryptd_init_instance(struct crypto_instance *inst,
+				struct crypto_alg *alg)
+{
+	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+		     "cryptd(%s)",
+		     alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+		return -ENAMETOOLONG;
+
+	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+
+	inst->alg.cra_priority = alg->cra_priority + 50;
+	inst->alg.cra_blocksize = alg->cra_blocksize;
+	inst->alg.cra_alignmask = alg->cra_alignmask;
+
+	return 0;
+}
+
 static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
 				   unsigned int tail)
 {
@@ -308,17 +325,10 @@ static void *cryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
 
 	inst = (void *)(p + head);
 
-	err = -ENAMETOOLONG;
-	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "cryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+	err = cryptd_init_instance(inst, alg);
+	if (err)
 		goto out_free_inst;
 
-	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
-
-	inst->alg.cra_priority = alg->cra_priority + 50;
-	inst->alg.cra_blocksize = alg->cra_blocksize;
-	inst->alg.cra_alignmask = alg->cra_alignmask;
-
 out:
 	return p;
 
@@ -729,7 +739,8 @@ static int cryptd_aead_init_tfm(struct crypto_tfm *tfm)
 
 	crypto_aead_set_flags(cipher, CRYPTO_TFM_REQ_MAY_SLEEP);
 	ctx->child = cipher;
-	tfm->crt_aead.reqsize = sizeof(struct cryptd_aead_request_ctx);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct cryptd_aead_request_ctx));
 	return 0;
 }
 
@@ -746,29 +757,34 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
 	struct aead_instance_ctx *ctx;
 	struct crypto_instance *inst;
 	struct crypto_alg *alg;
-	u32 type = CRYPTO_ALG_TYPE_AEAD;
-	u32 mask = CRYPTO_ALG_TYPE_MASK;
+	const char *name;
+	u32 type = 0;
+	u32 mask = 0;
 	int err;
 
 	cryptd_check_internal(tb, &type, &mask);
 
-	alg = crypto_get_attr_alg(tb, type, mask);
-        if (IS_ERR(alg))
-		return PTR_ERR(alg);
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return PTR_ERR(name);
 
-	inst = cryptd_alloc_instance(alg, 0, sizeof(*ctx));
-	err = PTR_ERR(inst);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
 
 	ctx = crypto_instance_ctx(inst);
 	ctx->queue = queue;
 
-	err = crypto_init_spawn(&ctx->aead_spawn.base, alg, inst,
-			CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
+	crypto_set_aead_spawn(&ctx->aead_spawn, inst);
+	err = crypto_grab_aead(&ctx->aead_spawn, name, type, mask);
 	if (err)
 		goto out_free_inst;
 
+	alg = crypto_aead_spawn_alg(&ctx->aead_spawn);
+	err = cryptd_init_instance(inst, alg);
+	if (err)
+		goto out_drop_aead;
+
 	type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
 	if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
 		type |= CRYPTO_ALG_INTERNAL;
@@ -789,12 +805,11 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
 
 	err = crypto_register_instance(tmpl, inst);
 	if (err) {
-		crypto_drop_spawn(&ctx->aead_spawn.base);
+out_drop_aead:
+		crypto_drop_aead(&ctx->aead_spawn);
 out_free_inst:
 		kfree(inst);
 	}
-out_put_alg:
-	crypto_mod_put(alg);
 	return err;
 }
 
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index a203191..941c9a4 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -25,6 +25,10 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 
+static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
+static struct crypto_blkcipher *crypto_default_null_skcipher;
+static int crypto_default_null_skcipher_refcnt;
+
 static int null_compress(struct crypto_tfm *tfm, const u8 *src,
 			 unsigned int slen, u8 *dst, unsigned int *dlen)
 {
@@ -149,6 +153,41 @@ MODULE_ALIAS_CRYPTO("compress_null");
 MODULE_ALIAS_CRYPTO("digest_null");
 MODULE_ALIAS_CRYPTO("cipher_null");
 
+struct crypto_blkcipher *crypto_get_default_null_skcipher(void)
+{
+	struct crypto_blkcipher *tfm;
+
+	mutex_lock(&crypto_default_null_skcipher_lock);
+	tfm = crypto_default_null_skcipher;
+
+	if (!tfm) {
+		tfm = crypto_alloc_blkcipher("ecb(cipher_null)", 0, 0);
+		if (IS_ERR(tfm))
+			goto unlock;
+
+		crypto_default_null_skcipher = tfm;
+	}
+
+	crypto_default_null_skcipher_refcnt++;
+
+unlock:
+	mutex_unlock(&crypto_default_null_skcipher_lock);
+
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_get_default_null_skcipher);
+
+void crypto_put_default_null_skcipher(void)
+{
+	mutex_lock(&crypto_default_null_skcipher_lock);
+	if (!--crypto_default_null_skcipher_refcnt) {
+		crypto_free_blkcipher(crypto_default_null_skcipher);
+		crypto_default_null_skcipher = NULL;
+	}
+	mutex_unlock(&crypto_default_null_skcipher_lock);
+}
+EXPORT_SYMBOL_GPL(crypto_put_default_null_skcipher);
+
 static int __init crypto_null_mod_init(void)
 {
 	int ret = 0;
diff --git a/crypto/drbg.c b/crypto/drbg.c
index b69409c..9284348 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -235,7 +235,7 @@ static bool drbg_fips_continuous_test(struct drbg_state *drbg,
 #ifdef CONFIG_CRYPTO_FIPS
 	int ret = 0;
 	/* skip test if we test the overall system */
-	if (drbg->test_data)
+	if (list_empty(&drbg->test_data.list))
 		return true;
 	/* only perform test in FIPS mode */
 	if (0 == fips_enabled)
@@ -487,7 +487,7 @@ static int drbg_ctr_df(struct drbg_state *drbg,
 
 out:
 	memset(iv, 0, drbg_blocklen(drbg));
-	memset(temp, 0, drbg_statelen(drbg));
+	memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg));
 	memset(pad, 0, drbg_blocklen(drbg));
 	return ret;
 }
@@ -1041,6 +1041,43 @@ static struct drbg_state_ops drbg_hash_ops = {
  * Functions common for DRBG implementations
  ******************************************************************/
 
+static inline int __drbg_seed(struct drbg_state *drbg, struct list_head *seed,
+			      int reseed)
+{
+	int ret = drbg->d_ops->update(drbg, seed, reseed);
+
+	if (ret)
+		return ret;
+
+	drbg->seeded = true;
+	/* 10.1.1.2 / 10.1.1.3 step 5 */
+	drbg->reseed_ctr = 1;
+
+	return ret;
+}
+
+static void drbg_async_seed(struct work_struct *work)
+{
+	struct drbg_string data;
+	LIST_HEAD(seedlist);
+	struct drbg_state *drbg = container_of(work, struct drbg_state,
+					       seed_work);
+	int ret;
+
+	get_blocking_random_bytes(drbg->seed_buf, drbg->seed_buf_len);
+
+	drbg_string_fill(&data, drbg->seed_buf, drbg->seed_buf_len);
+	list_add_tail(&data.list, &seedlist);
+	mutex_lock(&drbg->drbg_mutex);
+	ret = __drbg_seed(drbg, &seedlist, true);
+	if (!ret && drbg->jent) {
+		crypto_free_rng(drbg->jent);
+		drbg->jent = NULL;
+	}
+	memzero_explicit(drbg->seed_buf, drbg->seed_buf_len);
+	mutex_unlock(&drbg->drbg_mutex);
+}
+
 /*
  * Seeding or reseeding of the DRBG
  *
@@ -1056,8 +1093,6 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		     bool reseed)
 {
 	int ret = 0;
-	unsigned char *entropy = NULL;
-	size_t entropylen = 0;
 	struct drbg_string data1;
 	LIST_HEAD(seedlist);
 
@@ -1068,31 +1103,29 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		return -EINVAL;
 	}
 
-	if (drbg->test_data && drbg->test_data->testentropy) {
-		drbg_string_fill(&data1, drbg->test_data->testentropy->buf,
-				 drbg->test_data->testentropy->len);
+	if (list_empty(&drbg->test_data.list)) {
+		drbg_string_fill(&data1, drbg->test_data.buf,
+				 drbg->test_data.len);
 		pr_devel("DRBG: using test entropy\n");
 	} else {
-		/*
-		 * Gather entropy equal to the security strength of the DRBG.
-		 * With a derivation function, a nonce is required in addition
-		 * to the entropy. A nonce must be at least 1/2 of the security
-		 * strength of the DRBG in size. Thus, entropy * nonce is 3/2
-		 * of the strength. The consideration of a nonce is only
-		 * applicable during initial seeding.
-		 */
-		entropylen = drbg_sec_strength(drbg->core->flags);
-		if (!entropylen)
-			return -EFAULT;
-		if (!reseed)
-			entropylen = ((entropylen + 1) / 2) * 3;
-		pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n",
-			 entropylen);
-		entropy = kzalloc(entropylen, GFP_KERNEL);
-		if (!entropy)
-			return -ENOMEM;
-		get_random_bytes(entropy, entropylen);
-		drbg_string_fill(&data1, entropy, entropylen);
+		/* Get seed from in-kernel /dev/urandom */
+		get_random_bytes(drbg->seed_buf, drbg->seed_buf_len);
+
+		/* Get seed from Jitter RNG */
+		if (!drbg->jent ||
+		    crypto_rng_get_bytes(drbg->jent,
+					 drbg->seed_buf + drbg->seed_buf_len,
+					 drbg->seed_buf_len)) {
+			drbg_string_fill(&data1, drbg->seed_buf,
+					 drbg->seed_buf_len);
+			pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n",
+				 drbg->seed_buf_len);
+		} else {
+			drbg_string_fill(&data1, drbg->seed_buf,
+					 drbg->seed_buf_len * 2);
+			pr_devel("DRBG: (re)seeding with %zu bytes of entropy\n",
+				 drbg->seed_buf_len * 2);
+		}
 	}
 	list_add_tail(&data1.list, &seedlist);
 
@@ -1111,16 +1144,28 @@ static int drbg_seed(struct drbg_state *drbg, struct drbg_string *pers,
 		memset(drbg->C, 0, drbg_statelen(drbg));
 	}
 
-	ret = drbg->d_ops->update(drbg, &seedlist, reseed);
+	ret = __drbg_seed(drbg, &seedlist, reseed);
+
+	/*
+	 * Clear the initial entropy buffer as the async call may not overwrite
+	 * that buffer for quite some time.
+	 */
+	memzero_explicit(drbg->seed_buf, drbg->seed_buf_len * 2);
 	if (ret)
 		goto out;
+	/*
+	 * For all subsequent seeding calls, we only need the seed buffer
+	 * equal to the security strength of the DRBG. We undo the calculation
+	 * in drbg_alloc_state.
+	 */
+	if (!reseed)
+		drbg->seed_buf_len = drbg->seed_buf_len / 3 * 2;
 
-	drbg->seeded = true;
-	/* 10.1.1.2 / 10.1.1.3 step 5 */
-	drbg->reseed_ctr = 1;
+	/* Invoke asynchronous seeding unless DRBG is in test mode. */
+	if (!list_empty(&drbg->test_data.list) && !reseed)
+		schedule_work(&drbg->seed_work);
 
 out:
-	kzfree(entropy);
 	return ret;
 }
 
@@ -1136,11 +1181,19 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg)
 	kzfree(drbg->scratchpad);
 	drbg->scratchpad = NULL;
 	drbg->reseed_ctr = 0;
+	drbg->d_ops = NULL;
+	drbg->core = NULL;
 #ifdef CONFIG_CRYPTO_FIPS
 	kzfree(drbg->prev);
 	drbg->prev = NULL;
 	drbg->fips_primed = false;
 #endif
+	kzfree(drbg->seed_buf);
+	drbg->seed_buf = NULL;
+	if (drbg->jent) {
+		crypto_free_rng(drbg->jent);
+		drbg->jent = NULL;
+	}
 }
 
 /*
@@ -1152,6 +1205,27 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 	int ret = -ENOMEM;
 	unsigned int sb_size = 0;
 
+	switch (drbg->core->flags & DRBG_TYPE_MASK) {
+#ifdef CONFIG_CRYPTO_DRBG_HMAC
+	case DRBG_HMAC:
+		drbg->d_ops = &drbg_hmac_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_HMAC */
+#ifdef CONFIG_CRYPTO_DRBG_HASH
+	case DRBG_HASH:
+		drbg->d_ops = &drbg_hash_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_HASH */
+#ifdef CONFIG_CRYPTO_DRBG_CTR
+	case DRBG_CTR:
+		drbg->d_ops = &drbg_ctr_ops;
+		break;
+#endif /* CONFIG_CRYPTO_DRBG_CTR */
+	default:
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
 	drbg->V = kmalloc(drbg_statelen(drbg), GFP_KERNEL);
 	if (!drbg->V)
 		goto err;
@@ -1181,87 +1255,50 @@ static inline int drbg_alloc_state(struct drbg_state *drbg)
 		if (!drbg->scratchpad)
 			goto err;
 	}
-	spin_lock_init(&drbg->drbg_lock);
-	return 0;
-
-err:
-	drbg_dealloc_state(drbg);
-	return ret;
-}
 
-/*
- * Strategy to avoid holding long term locks: generate a shadow copy of DRBG
- * and perform all operations on this shadow copy. After finishing, restore
- * the updated state of the shadow copy into original drbg state. This way,
- * only the read and write operations of the original drbg state must be
- * locked
- */
-static inline void drbg_copy_drbg(struct drbg_state *src,
-				  struct drbg_state *dst)
-{
-	if (!src || !dst)
-		return;
-	memcpy(dst->V, src->V, drbg_statelen(src));
-	memcpy(dst->C, src->C, drbg_statelen(src));
-	dst->reseed_ctr = src->reseed_ctr;
-	dst->seeded = src->seeded;
-	dst->pr = src->pr;
-#ifdef CONFIG_CRYPTO_FIPS
-	dst->fips_primed = src->fips_primed;
-	memcpy(dst->prev, src->prev, drbg_blocklen(src));
-#endif
 	/*
-	 * Not copied:
-	 * scratchpad is initialized drbg_alloc_state;
-	 * priv_data is initialized with call to crypto_init;
-	 * d_ops and core are set outside, as these parameters are const;
-	 * test_data is set outside to prevent it being copied back.
+	 * Gather entropy equal to the security strength of the DRBG.
+	 * With a derivation function, a nonce is required in addition
+	 * to the entropy. A nonce must be at least 1/2 of the security
+	 * strength of the DRBG in size. Thus, entropy * nonce is 3/2
+	 * of the strength. The consideration of a nonce is only
+	 * applicable during initial seeding.
 	 */
-}
-
-static int drbg_make_shadow(struct drbg_state *drbg, struct drbg_state **shadow)
-{
-	int ret = -ENOMEM;
-	struct drbg_state *tmp = NULL;
-
-	tmp = kzalloc(sizeof(struct drbg_state), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
+	drbg->seed_buf_len = drbg_sec_strength(drbg->core->flags);
+	if (!drbg->seed_buf_len) {
+		ret = -EFAULT;
+		goto err;
+	}
+	/*
+	 * Ensure we have sufficient buffer space for initial seed which
+	 * consists of the seed from get_random_bytes and the Jitter RNG.
+	 */
+	drbg->seed_buf_len = ((drbg->seed_buf_len + 1) / 2) * 3;
+	drbg->seed_buf = kzalloc(drbg->seed_buf_len * 2, GFP_KERNEL);
+	if (!drbg->seed_buf)
+		goto err;
 
-	/* read-only data as they are defined as const, no lock needed */
-	tmp->core = drbg->core;
-	tmp->d_ops = drbg->d_ops;
+	INIT_WORK(&drbg->seed_work, drbg_async_seed);
 
-	ret = drbg_alloc_state(tmp);
-	if (ret)
-		goto err;
+	drbg->jent = crypto_alloc_rng("jitterentropy_rng", 0, 0);
+	if(IS_ERR(drbg->jent))
+	{
+		pr_info("DRBG: could not allocate Jitter RNG handle for seeding\n");
+		/*
+		 * As the Jitter RNG is a module that may not be present, we
+		 * continue with the operation and do not fully tie the DRBG
+		 * to the Jitter RNG.
+		 */
+		drbg->jent = NULL;
+	}
 
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg_copy_drbg(drbg, tmp);
-	/* only make a link to the test buffer, as we only read that data */
-	tmp->test_data = drbg->test_data;
-	spin_unlock_bh(&drbg->drbg_lock);
-	*shadow = tmp;
 	return 0;
 
 err:
-	kzfree(tmp);
+	drbg_dealloc_state(drbg);
 	return ret;
 }
 
-static void drbg_restore_shadow(struct drbg_state *drbg,
-				struct drbg_state **shadow)
-{
-	struct drbg_state *tmp = *shadow;
-
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg_copy_drbg(tmp, drbg);
-	spin_unlock_bh(&drbg->drbg_lock);
-	drbg_dealloc_state(tmp);
-	kzfree(tmp);
-	*shadow = NULL;
-}
-
 /*************************************************************************
  * DRBG interface functions
  *************************************************************************/
@@ -1287,14 +1324,12 @@ static int drbg_generate(struct drbg_state *drbg,
 			 struct drbg_string *addtl)
 {
 	int len = 0;
-	struct drbg_state *shadow = NULL;
 	LIST_HEAD(addtllist);
-	struct drbg_string timestamp;
-	union {
-		cycles_t cycles;
-		unsigned char char_cycles[sizeof(cycles_t)];
-	} now;
 
+	if (!drbg->core) {
+		pr_devel("DRBG: not yet seeded\n");
+		return -EINVAL;
+	}
 	if (0 == buflen || !buf) {
 		pr_devel("DRBG: no output buffer provided\n");
 		return -EINVAL;
@@ -1304,15 +1339,9 @@ static int drbg_generate(struct drbg_state *drbg,
 		return -EINVAL;
 	}
 
-	len = drbg_make_shadow(drbg, &shadow);
-	if (len) {
-		pr_devel("DRBG: shadow copy cannot be generated\n");
-		return len;
-	}
-
 	/* 9.3.1 step 2 */
 	len = -EINVAL;
-	if (buflen > (drbg_max_request_bytes(shadow))) {
+	if (buflen > (drbg_max_request_bytes(drbg))) {
 		pr_devel("DRBG: requested random numbers too large %u\n",
 			 buflen);
 		goto err;
@@ -1321,7 +1350,7 @@ static int drbg_generate(struct drbg_state *drbg,
 	/* 9.3.1 step 3 is implicit with the chosen DRBG */
 
 	/* 9.3.1 step 4 */
-	if (addtl && addtl->len > (drbg_max_addtl(shadow))) {
+	if (addtl && addtl->len > (drbg_max_addtl(drbg))) {
 		pr_devel("DRBG: additional information string too long %zu\n",
 			 addtl->len);
 		goto err;
@@ -1332,46 +1361,29 @@ static int drbg_generate(struct drbg_state *drbg,
 	 * 9.3.1 step 6 and 9 supplemented by 9.3.2 step c is implemented
 	 * here. The spec is a bit convoluted here, we make it simpler.
 	 */
-	if ((drbg_max_requests(shadow)) < shadow->reseed_ctr)
-		shadow->seeded = false;
-
-	/* allocate cipher handle */
-	len = shadow->d_ops->crypto_init(shadow);
-	if (len)
-		goto err;
+	if ((drbg_max_requests(drbg)) < drbg->reseed_ctr)
+		drbg->seeded = false;
 
-	if (shadow->pr || !shadow->seeded) {
+	if (drbg->pr || !drbg->seeded) {
 		pr_devel("DRBG: reseeding before generation (prediction "
 			 "resistance: %s, state %s)\n",
 			 drbg->pr ? "true" : "false",
 			 drbg->seeded ? "seeded" : "unseeded");
 		/* 9.3.1 steps 7.1 through 7.3 */
-		len = drbg_seed(shadow, addtl, true);
+		len = drbg_seed(drbg, addtl, true);
 		if (len)
 			goto err;
 		/* 9.3.1 step 7.4 */
 		addtl = NULL;
 	}
 
-	/*
-	 * Mix the time stamp into the DRBG state if the DRBG is not in
-	 * test mode. If there are two callers invoking the DRBG at the same
-	 * time, i.e. before the first caller merges its shadow state back,
-	 * both callers would obtain the same random number stream without
-	 * changing the state here.
-	 */
-	if (!drbg->test_data) {
-		now.cycles = random_get_entropy();
-		drbg_string_fill(&timestamp, now.char_cycles, sizeof(cycles_t));
-		list_add_tail(&timestamp.list, &addtllist);
-	}
 	if (addtl && 0 < addtl->len)
 		list_add_tail(&addtl->list, &addtllist);
 	/* 9.3.1 step 8 and 10 */
-	len = shadow->d_ops->generate(shadow, buf, buflen, &addtllist);
+	len = drbg->d_ops->generate(drbg, buf, buflen, &addtllist);
 
 	/* 10.1.1.4 step 6, 10.1.2.5 step 7, 10.2.1.5.2 step 7 */
-	shadow->reseed_ctr++;
+	drbg->reseed_ctr++;
 	if (0 >= len)
 		goto err;
 
@@ -1391,7 +1403,7 @@ static int drbg_generate(struct drbg_state *drbg,
 	 * case somebody has a need to implement the test of 11.3.3.
 	 */
 #if 0
-	if (shadow->reseed_ctr && !(shadow->reseed_ctr % 4096)) {
+	if (drbg->reseed_ctr && !(drbg->reseed_ctr % 4096)) {
 		int err = 0;
 		pr_devel("DRBG: start to perform self test\n");
 		if (drbg->core->flags & DRBG_HMAC)
@@ -1410,8 +1422,6 @@ static int drbg_generate(struct drbg_state *drbg,
 			 * are returned when reusing this DRBG cipher handle
 			 */
 			drbg_uninstantiate(drbg);
-			drbg_dealloc_state(shadow);
-			kzfree(shadow);
 			return 0;
 		} else {
 			pr_devel("DRBG: self test successful\n");
@@ -1425,8 +1435,6 @@ static int drbg_generate(struct drbg_state *drbg,
 	 */
 	len = 0;
 err:
-	shadow->d_ops->crypto_fini(shadow);
-	drbg_restore_shadow(drbg, &shadow);
 	return len;
 }
 
@@ -1442,19 +1450,21 @@ static int drbg_generate_long(struct drbg_state *drbg,
 			      unsigned char *buf, unsigned int buflen,
 			      struct drbg_string *addtl)
 {
-	int len = 0;
+	unsigned int len = 0;
 	unsigned int slice = 0;
 	do {
-		int tmplen = 0;
+		int err = 0;
 		unsigned int chunk = 0;
 		slice = ((buflen - len) / drbg_max_request_bytes(drbg));
 		chunk = slice ? drbg_max_request_bytes(drbg) : (buflen - len);
-		tmplen = drbg_generate(drbg, buf + len, chunk, addtl);
-		if (0 >= tmplen)
-			return tmplen;
-		len += tmplen;
+		mutex_lock(&drbg->drbg_mutex);
+		err = drbg_generate(drbg, buf + len, chunk, addtl);
+		mutex_unlock(&drbg->drbg_mutex);
+		if (0 > err)
+			return err;
+		len += chunk;
 	} while (slice > 0 && (len < buflen));
-	return len;
+	return 0;
 }
 
 /*
@@ -1477,32 +1487,12 @@ static int drbg_generate_long(struct drbg_state *drbg,
 static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 			    int coreref, bool pr)
 {
-	int ret = -ENOMEM;
+	int ret;
+	bool reseed = true;
 
 	pr_devel("DRBG: Initializing DRBG core %d with prediction resistance "
 		 "%s\n", coreref, pr ? "enabled" : "disabled");
-	drbg->core = &drbg_cores[coreref];
-	drbg->pr = pr;
-	drbg->seeded = false;
-	switch (drbg->core->flags & DRBG_TYPE_MASK) {
-#ifdef CONFIG_CRYPTO_DRBG_HMAC
-	case DRBG_HMAC:
-		drbg->d_ops = &drbg_hmac_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_HMAC */
-#ifdef CONFIG_CRYPTO_DRBG_HASH
-	case DRBG_HASH:
-		drbg->d_ops = &drbg_hash_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_HASH */
-#ifdef CONFIG_CRYPTO_DRBG_CTR
-	case DRBG_CTR:
-		drbg->d_ops = &drbg_ctr_ops;
-		break;
-#endif /* CONFIG_CRYPTO_DRBG_CTR */
-	default:
-		return -EOPNOTSUPP;
-	}
+	mutex_lock(&drbg->drbg_mutex);
 
 	/* 9.1 step 1 is implicit with the selected DRBG type */
 
@@ -1514,22 +1504,36 @@ static int drbg_instantiate(struct drbg_state *drbg, struct drbg_string *pers,
 
 	/* 9.1 step 4 is implicit in  drbg_sec_strength */
 
-	ret = drbg_alloc_state(drbg);
-	if (ret)
-		return ret;
+	if (!drbg->core) {
+		drbg->core = &drbg_cores[coreref];
+		drbg->pr = pr;
+		drbg->seeded = false;
 
-	ret = -EFAULT;
-	if (drbg->d_ops->crypto_init(drbg))
-		goto err;
-	ret = drbg_seed(drbg, pers, false);
-	drbg->d_ops->crypto_fini(drbg);
-	if (ret)
+		ret = drbg_alloc_state(drbg);
+		if (ret)
+			goto unlock;
+
+		ret = -EFAULT;
+		if (drbg->d_ops->crypto_init(drbg))
+			goto err;
+
+		reseed = false;
+	}
+
+	ret = drbg_seed(drbg, pers, reseed);
+
+	if (ret && !reseed) {
+		drbg->d_ops->crypto_fini(drbg);
 		goto err;
+	}
 
-	return 0;
+	mutex_unlock(&drbg->drbg_mutex);
+	return ret;
 
 err:
 	drbg_dealloc_state(drbg);
+unlock:
+	mutex_unlock(&drbg->drbg_mutex);
 	return ret;
 }
 
@@ -1544,10 +1548,11 @@ err:
  */
 static int drbg_uninstantiate(struct drbg_state *drbg)
 {
-	spin_lock_bh(&drbg->drbg_lock);
+	cancel_work_sync(&drbg->seed_work);
+	if (drbg->d_ops)
+		drbg->d_ops->crypto_fini(drbg);
 	drbg_dealloc_state(drbg);
 	/* no scrubbing of test_data -- this shall survive an uninstantiate */
-	spin_unlock_bh(&drbg->drbg_lock);
 	return 0;
 }
 
@@ -1555,16 +1560,17 @@ static int drbg_uninstantiate(struct drbg_state *drbg)
  * Helper function for setting the test data in the DRBG
  *
  * @drbg DRBG state handle
- * @test_data test data to sets
+ * @data test data
+ * @len test data length
  */
-static inline void drbg_set_testdata(struct drbg_state *drbg,
-				     struct drbg_test_data *test_data)
+static void drbg_kcapi_set_entropy(struct crypto_rng *tfm,
+				   const u8 *data, unsigned int len)
 {
-	if (!test_data || !test_data->testentropy)
-		return;
-	spin_lock_bh(&drbg->drbg_lock);
-	drbg->test_data = test_data;
-	spin_unlock_bh(&drbg->drbg_lock);
+	struct drbg_state *drbg = crypto_rng_ctx(tfm);
+
+	mutex_lock(&drbg->drbg_mutex);
+	drbg_string_fill(&drbg->test_data, data, len);
+	mutex_unlock(&drbg->drbg_mutex);
 }
 
 /***************************************************************
@@ -1714,15 +1720,10 @@ static inline void drbg_convert_tfm_core(const char *cra_driver_name,
 static int drbg_kcapi_init(struct crypto_tfm *tfm)
 {
 	struct drbg_state *drbg = crypto_tfm_ctx(tfm);
-	bool pr = false;
-	int coreref = 0;
 
-	drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm), &coreref, &pr);
-	/*
-	 * when personalization string is needed, the caller must call reset
-	 * and provide the personalization string as seed information
-	 */
-	return drbg_instantiate(drbg, NULL, coreref, pr);
+	mutex_init(&drbg->drbg_mutex);
+
+	return 0;
 }
 
 static void drbg_kcapi_cleanup(struct crypto_tfm *tfm)
@@ -1734,65 +1735,49 @@ static void drbg_kcapi_cleanup(struct crypto_tfm *tfm)
  * Generate random numbers invoked by the kernel crypto API:
  * The API of the kernel crypto API is extended as follows:
  *
- * If dlen is larger than zero, rdata is interpreted as the output buffer
- * where random data is to be stored.
- *
- * If dlen is zero, rdata is interpreted as a pointer to a struct drbg_gen
- * which holds the additional information string that is used for the
- * DRBG generation process. The output buffer that is to be used to store
- * data is also pointed to by struct drbg_gen.
+ * src is additional input supplied to the RNG.
+ * slen is the length of src.
+ * dst is the output buffer where random data is to be stored.
+ * dlen is the length of dst.
  */
-static int drbg_kcapi_random(struct crypto_rng *tfm, u8 *rdata,
-			     unsigned int dlen)
+static int drbg_kcapi_random(struct crypto_rng *tfm,
+			     const u8 *src, unsigned int slen,
+			     u8 *dst, unsigned int dlen)
 {
 	struct drbg_state *drbg = crypto_rng_ctx(tfm);
-	if (0 < dlen) {
-		return drbg_generate_long(drbg, rdata, dlen, NULL);
-	} else {
-		struct drbg_gen *data = (struct drbg_gen *)rdata;
-		struct drbg_string addtl;
-		/* catch NULL pointer */
-		if (!data)
-			return 0;
-		drbg_set_testdata(drbg, data->test_data);
+	struct drbg_string *addtl = NULL;
+	struct drbg_string string;
+
+	if (slen) {
 		/* linked list variable is now local to allow modification */
-		drbg_string_fill(&addtl, data->addtl->buf, data->addtl->len);
-		return drbg_generate_long(drbg, data->outbuf, data->outlen,
-					  &addtl);
+		drbg_string_fill(&string, src, slen);
+		addtl = &string;
 	}
+
+	return drbg_generate_long(drbg, dst, dlen, addtl);
 }
 
 /*
- * Reset the DRBG invoked by the kernel crypto API
- * The reset implies a full re-initialization of the DRBG. Similar to the
- * generate function of drbg_kcapi_random, this function extends the
- * kernel crypto API interface with struct drbg_gen
+ * Seed the DRBG invoked by the kernel crypto API
  */
-static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int drbg_kcapi_seed(struct crypto_rng *tfm,
+			   const u8 *seed, unsigned int slen)
 {
 	struct drbg_state *drbg = crypto_rng_ctx(tfm);
 	struct crypto_tfm *tfm_base = crypto_rng_tfm(tfm);
 	bool pr = false;
-	struct drbg_string seed_string;
+	struct drbg_string string;
+	struct drbg_string *seed_string = NULL;
 	int coreref = 0;
 
-	drbg_uninstantiate(drbg);
 	drbg_convert_tfm_core(crypto_tfm_alg_driver_name(tfm_base), &coreref,
 			      &pr);
 	if (0 < slen) {
-		drbg_string_fill(&seed_string, seed, slen);
-		return drbg_instantiate(drbg, &seed_string, coreref, pr);
-	} else {
-		struct drbg_gen *data = (struct drbg_gen *)seed;
-		/* allow invocation of API call with NULL, 0 */
-		if (!data)
-			return drbg_instantiate(drbg, NULL, coreref, pr);
-		drbg_set_testdata(drbg, data->test_data);
-		/* linked list variable is now local to allow modification */
-		drbg_string_fill(&seed_string, data->addtl->buf,
-				 data->addtl->len);
-		return drbg_instantiate(drbg, &seed_string, coreref, pr);
+		drbg_string_fill(&string, seed, slen);
+		seed_string = &string;
 	}
+
+	return drbg_instantiate(drbg, seed_string, coreref, pr);
 }
 
 /***************************************************************
@@ -1811,7 +1796,6 @@ static int drbg_kcapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
  */
 static inline int __init drbg_healthcheck_sanity(void)
 {
-#ifdef CONFIG_CRYPTO_FIPS
 	int len = 0;
 #define OUTBUFLEN 16
 	unsigned char buf[OUTBUFLEN];
@@ -1839,6 +1823,8 @@ static inline int __init drbg_healthcheck_sanity(void)
 	if (!drbg)
 		return -ENOMEM;
 
+	mutex_init(&drbg->drbg_mutex);
+
 	/*
 	 * if the following tests fail, it is likely that there is a buffer
 	 * overflow as buf is much smaller than the requested or provided
@@ -1877,37 +1863,33 @@ static inline int __init drbg_healthcheck_sanity(void)
 outbuf:
 	kzfree(drbg);
 	return rc;
-#else /* CONFIG_CRYPTO_FIPS */
-	return 0;
-#endif /* CONFIG_CRYPTO_FIPS */
 }
 
-static struct crypto_alg drbg_algs[22];
+static struct rng_alg drbg_algs[22];
 
 /*
  * Fill the array drbg_algs used to register the different DRBGs
  * with the kernel crypto API. To fill the array, the information
  * from drbg_cores[] is used.
  */
-static inline void __init drbg_fill_array(struct crypto_alg *alg,
+static inline void __init drbg_fill_array(struct rng_alg *alg,
 					  const struct drbg_core *core, int pr)
 {
 	int pos = 0;
 	static int priority = 100;
 
-	memset(alg, 0, sizeof(struct crypto_alg));
-	memcpy(alg->cra_name, "stdrng", 6);
+	memcpy(alg->base.cra_name, "stdrng", 6);
 	if (pr) {
-		memcpy(alg->cra_driver_name, "drbg_pr_", 8);
+		memcpy(alg->base.cra_driver_name, "drbg_pr_", 8);
 		pos = 8;
 	} else {
-		memcpy(alg->cra_driver_name, "drbg_nopr_", 10);
+		memcpy(alg->base.cra_driver_name, "drbg_nopr_", 10);
 		pos = 10;
 	}
-	memcpy(alg->cra_driver_name + pos, core->cra_name,
+	memcpy(alg->base.cra_driver_name + pos, core->cra_name,
 	       strlen(core->cra_name));
 
-	alg->cra_priority = priority;
+	alg->base.cra_priority = priority;
 	priority++;
 	/*
 	 * If FIPS mode enabled, the selected DRBG shall have the
@@ -1915,17 +1897,16 @@ static inline void __init drbg_fill_array(struct crypto_alg *alg,
 	 * it is selected.
 	 */
 	if (fips_enabled)
-		alg->cra_priority += 200;
-
-	alg->cra_flags		= CRYPTO_ALG_TYPE_RNG;
-	alg->cra_ctxsize 	= sizeof(struct drbg_state);
-	alg->cra_type		= &crypto_rng_type;
-	alg->cra_module		= THIS_MODULE;
-	alg->cra_init		= drbg_kcapi_init;
-	alg->cra_exit		= drbg_kcapi_cleanup;
-	alg->cra_u.rng.rng_make_random	= drbg_kcapi_random;
-	alg->cra_u.rng.rng_reset	= drbg_kcapi_reset;
-	alg->cra_u.rng.seedsize	= 0;
+		alg->base.cra_priority += 200;
+
+	alg->base.cra_ctxsize 	= sizeof(struct drbg_state);
+	alg->base.cra_module	= THIS_MODULE;
+	alg->base.cra_init	= drbg_kcapi_init;
+	alg->base.cra_exit	= drbg_kcapi_cleanup;
+	alg->generate		= drbg_kcapi_random;
+	alg->seed		= drbg_kcapi_seed;
+	alg->set_ent		= drbg_kcapi_set_entropy;
+	alg->seedsize		= 0;
 }
 
 static int __init drbg_init(void)
@@ -1958,12 +1939,12 @@ static int __init drbg_init(void)
 		drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 1);
 	for (j = 0; ARRAY_SIZE(drbg_cores) > j; j++, i++)
 		drbg_fill_array(&drbg_algs[i], &drbg_cores[j], 0);
-	return crypto_register_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
+	return crypto_register_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
 }
 
 static void __exit drbg_exit(void)
 {
-	crypto_unregister_algs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
+	crypto_unregister_rngs(drbg_algs, (ARRAY_SIZE(drbg_cores) * 2));
 }
 
 module_init(drbg_init);
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
new file mode 100644
index 0000000..bd85dcc
--- /dev/null
+++ b/crypto/echainiv.c
@@ -0,0 +1,546 @@
+/*
+ * echainiv: Encrypted Chain IV Generator
+ *
+ * This generator generates an IV based on a sequence number by xoring it
+ * with a salt and then encrypting it with the same key as used to encrypt
+ * the plain text.  This algorithm requires that the block size be equal
+ * to the IV size.  It is mainly useful for CBC.
+ *
+ * This generator can only be used by algorithms where authentication
+ * is performed after encryption (i.e., authenc).
+ *
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/rng.h>
+#include <crypto/scatterwalk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#define MAX_IV_SIZE 16
+
+struct echainiv_request_ctx {
+	struct scatterlist src[2];
+	struct scatterlist dst[2];
+	struct scatterlist ivbuf[2];
+	struct scatterlist *ivsg;
+	struct aead_givcrypt_request subreq;
+};
+
+struct echainiv_ctx {
+	struct crypto_aead *child;
+	spinlock_t lock;
+	struct crypto_blkcipher *null;
+	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
+};
+
+static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
+
+static int echainiv_setkey(struct crypto_aead *tfm,
+			      const u8 *key, unsigned int keylen)
+{
+	struct echainiv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setkey(ctx->child, key, keylen);
+}
+
+static int echainiv_setauthsize(struct crypto_aead *tfm,
+				  unsigned int authsize)
+{
+	struct echainiv_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
+/* We don't care if we get preempted and read/write IVs from the next CPU. */
+static void echainiv_read_iv(u8 *dst, unsigned size)
+{
+	u32 *a = (u32 *)dst;
+	u32 __percpu *b = echainiv_iv;
+
+	for (; size >= 4; size -= 4) {
+		*a++ = this_cpu_read(*b);
+		b++;
+	}
+}
+
+static void echainiv_write_iv(const u8 *src, unsigned size)
+{
+	const u32 *a = (const u32 *)src;
+	u32 __percpu *b = echainiv_iv;
+
+	for (; size >= 4; size -= 4) {
+		this_cpu_write(*b, *a);
+		a++;
+		b++;
+	}
+}
+
+static void echainiv_encrypt_compat_complete2(struct aead_request *req,
+						 int err)
+{
+	struct echainiv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_givcrypt_request *subreq = &rctx->subreq;
+	struct crypto_aead *geniv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	scatterwalk_map_and_copy(subreq->giv, rctx->ivsg, 0,
+				 crypto_aead_ivsize(geniv), 1);
+
+out:
+	kzfree(subreq->giv);
+}
+
+static void echainiv_encrypt_compat_complete(
+	struct crypto_async_request *base, int err)
+{
+	struct aead_request *req = base->data;
+
+	echainiv_encrypt_compat_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static void echainiv_encrypt_complete2(struct aead_request *req, int err)
+{
+	struct aead_request *subreq = aead_request_ctx(req);
+	struct crypto_aead *geniv;
+	unsigned int ivsize;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	ivsize = crypto_aead_ivsize(geniv);
+
+	echainiv_write_iv(subreq->iv, ivsize);
+
+	if (req->iv != subreq->iv)
+		memcpy(req->iv, subreq->iv, ivsize);
+
+out:
+	if (req->iv != subreq->iv)
+		kzfree(subreq->iv);
+}
+
+static void echainiv_encrypt_complete(struct crypto_async_request *base,
+					 int err)
+{
+	struct aead_request *req = base->data;
+
+	echainiv_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static int echainiv_encrypt_compat(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct echainiv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_givcrypt_request *subreq = &rctx->subreq;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	__be64 seq;
+	int err;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	rctx->ivsg = scatterwalk_ffwd(rctx->ivbuf, req->dst, req->assoclen);
+	info = PageHighMem(sg_page(rctx->ivsg)) ? NULL : sg_virt(rctx->ivsg);
+
+	if (!info) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		compl = echainiv_encrypt_compat_complete;
+		data = req;
+	}
+
+	memcpy(&seq, req->iv + ivsize - sizeof(seq), sizeof(seq));
+
+	aead_givcrypt_set_tfm(subreq, ctx->child);
+	aead_givcrypt_set_callback(subreq, req->base.flags,
+				   req->base.complete, req->base.data);
+	aead_givcrypt_set_crypt(subreq,
+				scatterwalk_ffwd(rctx->src, req->src,
+						 req->assoclen + ivsize),
+				scatterwalk_ffwd(rctx->dst, rctx->ivsg,
+						 ivsize),
+				req->cryptlen - ivsize, req->iv);
+	aead_givcrypt_set_assoc(subreq, req->src, req->assoclen);
+	aead_givcrypt_set_giv(subreq, info, be64_to_cpu(seq));
+
+	err = crypto_aead_givencrypt(subreq);
+	if (unlikely(PageHighMem(sg_page(rctx->ivsg))))
+		echainiv_encrypt_compat_complete2(req, err);
+	return err;
+}
+
+static int echainiv_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+	int err;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = echainiv_encrypt_complete;
+	data = req;
+	info = req->iv;
+
+	if (req->src != req->dst) {
+		struct scatterlist src[2];
+		struct scatterlist dst[2];
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(
+			&desc,
+			scatterwalk_ffwd(dst, req->dst,
+					 req->assoclen + ivsize),
+			scatterwalk_ffwd(src, req->src,
+					 req->assoclen + ivsize),
+			req->cryptlen - ivsize);
+		if (err)
+			return err;
+	}
+
+	if (unlikely(!IS_ALIGNED((unsigned long)info,
+				 crypto_aead_alignmask(geniv) + 1))) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		memcpy(info, req->iv, ivsize);
+	}
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->dst, req->dst,
+			       req->cryptlen - ivsize, info);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	crypto_xor(info, ctx->salt, ivsize);
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+	echainiv_read_iv(info, ivsize);
+
+	err = crypto_aead_encrypt(subreq);
+	echainiv_encrypt_complete2(req, err);
+	return err;
+}
+
+static int echainiv_decrypt_compat(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct echainiv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq.areq;
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq,
+			       scatterwalk_ffwd(rctx->src, req->src,
+						req->assoclen + ivsize),
+			       scatterwalk_ffwd(rctx->dst, req->dst,
+						req->assoclen + ivsize),
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_assoc(subreq, req->src, req->assoclen);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+
+	return crypto_aead_decrypt(subreq);
+}
+
+static int echainiv_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = crypto_aead_ivsize(geniv);
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+	if (req->src != req->dst)
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->assoclen, ivsize, 1);
+
+	return crypto_aead_decrypt(subreq);
+}
+
+static int echainiv_encrypt_compat_first(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err = 0;
+
+	spin_lock_bh(&ctx->lock);
+	if (geniv->encrypt != echainiv_encrypt_compat_first)
+		goto unlock;
+
+	geniv->encrypt = echainiv_encrypt_compat;
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+
+unlock:
+	spin_unlock_bh(&ctx->lock);
+
+	if (err)
+		return err;
+
+	return echainiv_encrypt_compat(req);
+}
+
+static int echainiv_encrypt_first(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err = 0;
+
+	spin_lock_bh(&ctx->lock);
+	if (geniv->encrypt != echainiv_encrypt_first)
+		goto unlock;
+
+	geniv->encrypt = echainiv_encrypt;
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+
+unlock:
+	spin_unlock_bh(&ctx->lock);
+
+	if (err)
+		return err;
+
+	return echainiv_encrypt(req);
+}
+
+static int echainiv_compat_init(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
+
+	spin_lock_init(&ctx->lock);
+
+	crypto_aead_set_reqsize(geniv, sizeof(struct echainiv_request_ctx));
+
+	err = aead_geniv_init(tfm);
+
+	ctx->child = geniv->child;
+	geniv->child = geniv;
+
+	return err;
+}
+
+static int echainiv_init(struct crypto_tfm *tfm)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
+
+	spin_lock_init(&ctx->lock);
+
+	crypto_aead_set_reqsize(geniv, sizeof(struct aead_request));
+
+	ctx->null = crypto_get_default_null_skcipher();
+	err = PTR_ERR(ctx->null);
+	if (IS_ERR(ctx->null))
+		goto out;
+
+	err = aead_geniv_init(tfm);
+	if (err)
+		goto drop_null;
+
+	ctx->child = geniv->child;
+	geniv->child = geniv;
+
+out:
+	return err;
+
+drop_null:
+	crypto_put_default_null_skcipher();
+	goto out;
+}
+
+static void echainiv_compat_exit(struct crypto_tfm *tfm)
+{
+	struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->child);
+}
+
+static void echainiv_exit(struct crypto_tfm *tfm)
+{
+	struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->child);
+	crypto_put_default_null_skcipher();
+}
+
+static int echainiv_aead_create(struct crypto_template *tmpl,
+				struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+
+	if (IS_ERR(inst))
+		return PTR_ERR(inst);
+
+	err = -EINVAL;
+	if (inst->alg.ivsize < sizeof(u64) ||
+	    inst->alg.ivsize & (sizeof(u32) - 1) ||
+	    inst->alg.ivsize > MAX_IV_SIZE)
+		goto free_inst;
+
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	inst->alg.setkey = echainiv_setkey;
+	inst->alg.setauthsize = echainiv_setauthsize;
+	inst->alg.encrypt = echainiv_encrypt_first;
+	inst->alg.decrypt = echainiv_decrypt;
+
+	inst->alg.base.cra_init = echainiv_init;
+	inst->alg.base.cra_exit = echainiv_exit;
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_ctxsize = sizeof(struct echainiv_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize;
+
+	if (alg->base.cra_aead.encrypt) {
+		inst->alg.encrypt = echainiv_encrypt_compat_first;
+		inst->alg.decrypt = echainiv_decrypt_compat;
+
+		inst->alg.base.cra_init = echainiv_compat_init;
+		inst->alg.base.cra_exit = echainiv_compat_exit;
+	}
+
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
+
+out:
+	return err;
+
+free_inst:
+	aead_geniv_free(inst);
+	goto out;
+}
+
+static int echainiv_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	int err;
+
+	err = crypto_get_default_rng();
+	if (err)
+		goto out;
+
+	err = echainiv_aead_create(tmpl, tb);
+	if (err)
+		goto put_rng;
+
+out:
+	return err;
+
+put_rng:
+	crypto_put_default_rng();
+	goto out;
+}
+
+static void echainiv_free(struct crypto_instance *inst)
+{
+	aead_geniv_free(aead_instance(inst));
+	crypto_put_default_rng();
+}
+
+static struct crypto_template echainiv_tmpl = {
+	.name = "echainiv",
+	.create = echainiv_create,
+	.free = echainiv_free,
+	.module = THIS_MODULE,
+};
+
+static int __init echainiv_module_init(void)
+{
+	return crypto_register_template(&echainiv_tmpl);
+}
+
+static void __exit echainiv_module_exit(void)
+{
+	crypto_unregister_template(&echainiv_tmpl);
+}
+
+module_init(echainiv_module_init);
+module_exit(echainiv_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Encrypted Chain IV Generator");
+MODULE_ALIAS_CRYPTO("echainiv");
diff --git a/crypto/fips.c b/crypto/fips.c
index 5539700..9d627c1 100644
--- a/crypto/fips.c
+++ b/crypto/fips.c
@@ -10,7 +10,12 @@
  *
  */
 
-#include "internal.h"
+#include <linux/export.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sysctl.h>
 
 int fips_enabled;
 EXPORT_SYMBOL_GPL(fips_enabled);
@@ -25,3 +30,49 @@ static int fips_enable(char *str)
 }
 
 __setup("fips=", fips_enable);
+
+static struct ctl_table crypto_sysctl_table[] = {
+	{
+		.procname       = "fips_enabled",
+		.data           = &fips_enabled,
+		.maxlen         = sizeof(int),
+		.mode           = 0444,
+		.proc_handler   = proc_dointvec
+	},
+	{}
+};
+
+static struct ctl_table crypto_dir_table[] = {
+	{
+		.procname       = "crypto",
+		.mode           = 0555,
+		.child          = crypto_sysctl_table
+	},
+	{}
+};
+
+static struct ctl_table_header *crypto_sysctls;
+
+static void crypto_proc_fips_init(void)
+{
+	crypto_sysctls = register_sysctl_table(crypto_dir_table);
+}
+
+static void crypto_proc_fips_exit(void)
+{
+	unregister_sysctl_table(crypto_sysctls);
+}
+
+static int __init fips_init(void)
+{
+	crypto_proc_fips_init();
+	return 0;
+}
+
+static void __exit fips_exit(void)
+{
+	crypto_proc_fips_exit();
+}
+
+module_init(fips_init);
+module_exit(fips_exit);
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 2e403f6..fc2b55e 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -12,6 +12,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/hash.h>
+#include <crypto/null.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/hash.h>
 #include "internal.h"
@@ -39,7 +40,6 @@ struct crypto_rfc4106_ctx {
 
 struct crypto_rfc4543_instance_ctx {
 	struct crypto_aead_spawn aead;
-	struct crypto_skcipher_spawn null;
 };
 
 struct crypto_rfc4543_ctx {
@@ -672,12 +672,12 @@ static int crypto_gcm_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_tfm_alg_alignmask(tfm);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = align +
-		offsetof(struct crypto_gcm_req_priv_ctx, u) +
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		align + offsetof(struct crypto_gcm_req_priv_ctx, u) +
 		max(sizeof(struct ablkcipher_request) +
 		    crypto_ablkcipher_reqsize(ctr),
 		    sizeof(struct ahash_request) +
-		    crypto_ahash_reqsize(ghash));
+		    crypto_ahash_reqsize(ghash)));
 
 	return 0;
 
@@ -946,10 +946,10 @@ static int crypto_rfc4106_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct aead_request) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct aead_request) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 16);
 
 	return 0;
 }
@@ -1246,7 +1246,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_spawn_blkcipher(&ictx->null.base);
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1256,10 +1256,10 @@ static int crypto_rfc4543_init_tfm(struct crypto_tfm *tfm)
 
 	align = crypto_aead_alignmask(aead);
 	align &= ~(crypto_tfm_ctx_alignment() - 1);
-	tfm->crt_aead.reqsize = sizeof(struct crypto_rfc4543_req_ctx) +
-				ALIGN(crypto_aead_reqsize(aead),
-				      crypto_tfm_ctx_alignment()) +
-				align + 16;
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct crypto_rfc4543_req_ctx) +
+		ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
+		align + 16);
 
 	return 0;
 
@@ -1273,7 +1273,7 @@ static void crypto_rfc4543_exit_tfm(struct crypto_tfm *tfm)
 	struct crypto_rfc4543_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_free_blkcipher(ctx->null);
+	crypto_put_default_null_skcipher();
 }
 
 static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
@@ -1311,23 +1311,15 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
 
 	alg = crypto_aead_spawn_alg(spawn);
 
-	crypto_set_skcipher_spawn(&ctx->null, inst);
-	err = crypto_grab_skcipher(&ctx->null, "ecb(cipher_null)", 0,
-				   CRYPTO_ALG_ASYNC);
-	if (err)
-		goto out_drop_alg;
-
-	crypto_skcipher_spawn_alg(&ctx->null);
-
 	err = -EINVAL;
 
 	/* We only support 16-byte blocks. */
 	if (alg->cra_aead.ivsize != 16)
-		goto out_drop_ecbnull;
+		goto out_drop_alg;
 
 	/* Not a stream cipher? */
 	if (alg->cra_blocksize != 1)
-		goto out_drop_ecbnull;
+		goto out_drop_alg;
 
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
@@ -1335,7 +1327,7 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
 	    snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "rfc4543(%s)", alg->cra_driver_name) >=
 	    CRYPTO_MAX_ALG_NAME)
-		goto out_drop_ecbnull;
+		goto out_drop_alg;
 
 	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
 	inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
@@ -1362,8 +1354,6 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
 out:
 	return inst;
 
-out_drop_ecbnull:
-	crypto_drop_skcipher(&ctx->null);
 out_drop_alg:
 	crypto_drop_aead(spawn);
 out_free_inst:
@@ -1377,7 +1367,6 @@ static void crypto_rfc4543_free(struct crypto_instance *inst)
 	struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst);
 
 	crypto_drop_aead(&ctx->aead);
-	crypto_drop_skcipher(&ctx->null);
 
 	kfree(inst);
 }
diff --git a/crypto/internal.h b/crypto/internal.h
index bd39bfc..00e42a3 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -25,7 +25,6 @@
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
-#include <linux/fips.h>
 
 /* Crypto notification events. */
 enum {
@@ -103,6 +102,8 @@ int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
 
+unsigned int crypto_alg_extsize(struct crypto_alg *alg);
+
 static inline struct crypto_alg *crypto_alg_get(struct crypto_alg *alg)
 {
 	atomic_inc(&alg->cra_refcnt);
diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c
new file mode 100644
index 0000000..1ebe58a
--- /dev/null
+++ b/crypto/jitterentropy.c
@@ -0,0 +1,909 @@
+/*
+ * Non-physical true random number generator based on timing jitter.
+ *
+ * Copyright Stephan Mueller <smueller@chronox.de>, 2014
+ *
+ * Design
+ * ======
+ *
+ * See http://www.chronox.de/jent.html
+ *
+ * License
+ * =======
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, and the entire permission notice in its entirety,
+ *    including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior
+ *    written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL2 are
+ * required INSTEAD OF the above restrictions.  (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+/*
+ * This Jitterentropy RNG is based on the jitterentropy library
+ * version 1.1.0 provided at http://www.chronox.de/jent.html
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/fips.h>
+#include <linux/time.h>
+#include <linux/crypto.h>
+#include <crypto/internal/rng.h>
+
+#ifdef __OPTIMIZE__
+ #error "The CPU Jitter random number generator must not be compiled with optimizations. See documentation. Use the compiler switch -O0 for compiling jitterentropy.c."
+#endif
+
+/* The entropy pool */
+struct rand_data {
+	/* all data values that are vital to maintain the security
+	 * of the RNG are marked as SENSITIVE. A user must not
+	 * access that information while the RNG executes its loops to
+	 * calculate the next random value. */
+	__u64 data;		/* SENSITIVE Actual random number */
+	__u64 old_data;		/* SENSITIVE Previous random number */
+	__u64 prev_time;	/* SENSITIVE Previous time stamp */
+#define DATA_SIZE_BITS ((sizeof(__u64)) * 8)
+	__u64 last_delta;	/* SENSITIVE stuck test */
+	__s64 last_delta2;	/* SENSITIVE stuck test */
+	unsigned int stuck:1;	/* Time measurement stuck */
+	unsigned int osr;	/* Oversample rate */
+	unsigned int stir:1;		/* Post-processing stirring */
+	unsigned int disable_unbias:1;	/* Deactivate Von-Neuman unbias */
+#define JENT_MEMORY_BLOCKS 64
+#define JENT_MEMORY_BLOCKSIZE 32
+#define JENT_MEMORY_ACCESSLOOPS 128
+#define JENT_MEMORY_SIZE (JENT_MEMORY_BLOCKS*JENT_MEMORY_BLOCKSIZE)
+	unsigned char *mem;	/* Memory access location with size of
+				 * memblocks * memblocksize */
+	unsigned int memlocation; /* Pointer to byte in *mem */
+	unsigned int memblocks;	/* Number of memory blocks in *mem */
+	unsigned int memblocksize; /* Size of one memory block in bytes */
+	unsigned int memaccessloops; /* Number of memory accesses per random
+				      * bit generation */
+};
+
+/* Flags that can be used to initialize the RNG */
+#define JENT_DISABLE_STIR (1<<0) /* Disable stirring the entropy pool */
+#define JENT_DISABLE_UNBIAS (1<<1) /* Disable the Von-Neuman Unbiaser */
+#define JENT_DISABLE_MEMORY_ACCESS (1<<2) /* Disable memory access for more
+					   * entropy, saves MEMORY_SIZE RAM for
+					   * entropy collector */
+
+#define DRIVER_NAME     "jitterentropy"
+
+/* -- error codes for init function -- */
+#define JENT_ENOTIME		1 /* Timer service not available */
+#define JENT_ECOARSETIME	2 /* Timer too coarse for RNG */
+#define JENT_ENOMONOTONIC	3 /* Timer is not monotonic increasing */
+#define JENT_EMINVARIATION	4 /* Timer variations too small for RNG */
+#define JENT_EVARVAR		5 /* Timer does not produce variations of
+				   * variations (2nd derivation of time is
+				   * zero). */
+#define JENT_EMINVARVAR		6 /* Timer variations of variations is tooi
+				   * small. */
+
+/***************************************************************************
+ * Helper functions
+ ***************************************************************************/
+
+static inline void jent_get_nstime(__u64 *out)
+{
+	struct timespec ts;
+	__u64 tmp = 0;
+
+	tmp = random_get_entropy();
+
+	/*
+	 * If random_get_entropy does not return a value (which is possible on,
+	 * for example, MIPS), invoke __getnstimeofday
+	 * hoping that there are timers we can work with.
+	 *
+	 * The list of available timers can be obtained from
+	 * /sys/devices/system/clocksource/clocksource0/available_clocksource
+	 * and are registered with clocksource_register()
+	 */
+	if ((0 == tmp) &&
+#ifndef MODULE
+	   (0 == timekeeping_valid_for_hres()) &&
+#endif
+	   (0 == __getnstimeofday(&ts))) {
+		tmp = ts.tv_sec;
+		tmp = tmp << 32;
+		tmp = tmp | ts.tv_nsec;
+	}
+
+	*out = tmp;
+}
+
+
+/**
+ * Update of the loop count used for the next round of
+ * an entropy collection.
+ *
+ * Input:
+ * @ec entropy collector struct -- may be NULL
+ * @bits is the number of low bits of the timer to consider
+ * @min is the number of bits we shift the timer value to the right at
+ *	the end to make sure we have a guaranteed minimum value
+ *
+ * @return Newly calculated loop counter
+ */
+static __u64 jent_loop_shuffle(struct rand_data *ec,
+			       unsigned int bits, unsigned int min)
+{
+	__u64 time = 0;
+	__u64 shuffle = 0;
+	unsigned int i = 0;
+	unsigned int mask = (1<<bits) - 1;
+
+	jent_get_nstime(&time);
+	/*
+	 * mix the current state of the random number into the shuffle
+	 * calculation to balance that shuffle a bit more
+	 */
+	if (ec)
+		time ^= ec->data;
+	/*
+	 * we fold the time value as much as possible to ensure that as many
+	 * bits of the time stamp are included as possible
+	 */
+	for (i = 0; (DATA_SIZE_BITS / bits) > i; i++) {
+		shuffle ^= time & mask;
+		time = time >> bits;
+	}
+
+	/*
+	 * We add a lower boundary value to ensure we have a minimum
+	 * RNG loop count.
+	 */
+	return (shuffle + (1<<min));
+}
+
+/***************************************************************************
+ * Noise sources
+ ***************************************************************************/
+
+/**
+ * CPU Jitter noise source -- this is the noise source based on the CPU
+ *			      execution time jitter
+ *
+ * This function folds the time into one bit units by iterating
+ * through the DATA_SIZE_BITS bit time value as follows: assume our time value
+ * is 0xabcd
+ * 1st loop, 1st shift generates 0xd000
+ * 1st loop, 2nd shift generates 0x000d
+ * 2nd loop, 1st shift generates 0xcd00
+ * 2nd loop, 2nd shift generates 0x000c
+ * 3rd loop, 1st shift generates 0xbcd0
+ * 3rd loop, 2nd shift generates 0x000b
+ * 4th loop, 1st shift generates 0xabcd
+ * 4th loop, 2nd shift generates 0x000a
+ * Now, the values at the end of the 2nd shifts are XORed together.
+ *
+ * The code is deliberately inefficient and shall stay that way. This function
+ * is the root cause why the code shall be compiled without optimization. This
+ * function not only acts as folding operation, but this function's execution
+ * is used to measure the CPU execution time jitter. Any change to the loop in
+ * this function implies that careful retesting must be done.
+ *
+ * Input:
+ * @ec entropy collector struct -- may be NULL
+ * @time time stamp to be folded
+ * @loop_cnt if a value not equal to 0 is set, use the given value as number of
+ *	     loops to perform the folding
+ *
+ * Output:
+ * @folded result of folding operation
+ *
+ * @return Number of loops the folding operation is performed
+ */
+static __u64 jent_fold_time(struct rand_data *ec, __u64 time,
+			    __u64 *folded, __u64 loop_cnt)
+{
+	unsigned int i;
+	__u64 j = 0;
+	__u64 new = 0;
+#define MAX_FOLD_LOOP_BIT 4
+#define MIN_FOLD_LOOP_BIT 0
+	__u64 fold_loop_cnt =
+		jent_loop_shuffle(ec, MAX_FOLD_LOOP_BIT, MIN_FOLD_LOOP_BIT);
+
+	/*
+	 * testing purposes -- allow test app to set the counter, not
+	 * needed during runtime
+	 */
+	if (loop_cnt)
+		fold_loop_cnt = loop_cnt;
+	for (j = 0; j < fold_loop_cnt; j++) {
+		new = 0;
+		for (i = 1; (DATA_SIZE_BITS) >= i; i++) {
+			__u64 tmp = time << (DATA_SIZE_BITS - i);
+
+			tmp = tmp >> (DATA_SIZE_BITS - 1);
+			new ^= tmp;
+		}
+	}
+	*folded = new;
+	return fold_loop_cnt;
+}
+
+/**
+ * Memory Access noise source -- this is a noise source based on variations in
+ *				 memory access times
+ *
+ * This function performs memory accesses which will add to the timing
+ * variations due to an unknown amount of CPU wait states that need to be
+ * added when accessing memory. The memory size should be larger than the L1
+ * caches as outlined in the documentation and the associated testing.
+ *
+ * The L1 cache has a very high bandwidth, albeit its access rate is  usually
+ * slower than accessing CPU registers. Therefore, L1 accesses only add minimal
+ * variations as the CPU has hardly to wait. Starting with L2, significant
+ * variations are added because L2 typically does not belong to the CPU any more
+ * and therefore a wider range of CPU wait states is necessary for accesses.
+ * L3 and real memory accesses have even a wider range of wait states. However,
+ * to reliably access either L3 or memory, the ec->mem memory must be quite
+ * large which is usually not desirable.
+ *
+ * Input:
+ * @ec Reference to the entropy collector with the memory access data -- if
+ *     the reference to the memory block to be accessed is NULL, this noise
+ *     source is disabled
+ * @loop_cnt if a value not equal to 0 is set, use the given value as number of
+ *	     loops to perform the folding
+ *
+ * @return Number of memory access operations
+ */
+static unsigned int jent_memaccess(struct rand_data *ec, __u64 loop_cnt)
+{
+	unsigned char *tmpval = NULL;
+	unsigned int wrap = 0;
+	__u64 i = 0;
+#define MAX_ACC_LOOP_BIT 7
+#define MIN_ACC_LOOP_BIT 0
+	__u64 acc_loop_cnt =
+		jent_loop_shuffle(ec, MAX_ACC_LOOP_BIT, MIN_ACC_LOOP_BIT);
+
+	if (NULL == ec || NULL == ec->mem)
+		return 0;
+	wrap = ec->memblocksize * ec->memblocks;
+
+	/*
+	 * testing purposes -- allow test app to set the counter, not
+	 * needed during runtime
+	 */
+	if (loop_cnt)
+		acc_loop_cnt = loop_cnt;
+
+	for (i = 0; i < (ec->memaccessloops + acc_loop_cnt); i++) {
+		tmpval = ec->mem + ec->memlocation;
+		/*
+		 * memory access: just add 1 to one byte,
+		 * wrap at 255 -- memory access implies read
+		 * from and write to memory location
+		 */
+		*tmpval = (*tmpval + 1) & 0xff;
+		/*
+		 * Addition of memblocksize - 1 to pointer
+		 * with wrap around logic to ensure that every
+		 * memory location is hit evenly
+		 */
+		ec->memlocation = ec->memlocation + ec->memblocksize - 1;
+		ec->memlocation = ec->memlocation % wrap;
+	}
+	return i;
+}
+
+/***************************************************************************
+ * Start of entropy processing logic
+ ***************************************************************************/
+
+/**
+ * Stuck test by checking the:
+ *	1st derivation of the jitter measurement (time delta)
+ *	2nd derivation of the jitter measurement (delta of time deltas)
+ *	3rd derivation of the jitter measurement (delta of delta of time deltas)
+ *
+ * All values must always be non-zero.
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ * @current_delta Jitter time delta
+ *
+ * @return
+ *	0 jitter measurement not stuck (good bit)
+ *	1 jitter measurement stuck (reject bit)
+ */
+static void jent_stuck(struct rand_data *ec, __u64 current_delta)
+{
+	__s64 delta2 = ec->last_delta - current_delta;
+	__s64 delta3 = delta2 - ec->last_delta2;
+
+	ec->last_delta = current_delta;
+	ec->last_delta2 = delta2;
+
+	if (!current_delta || !delta2 || !delta3)
+		ec->stuck = 1;
+}
+
+/**
+ * This is the heart of the entropy generation: calculate time deltas and
+ * use the CPU jitter in the time deltas. The jitter is folded into one
+ * bit. You can call this function the "random bit generator" as it
+ * produces one random bit per invocation.
+ *
+ * WARNING: ensure that ->prev_time is primed before using the output
+ *	    of this function! This can be done by calling this function
+ *	    and not using its result.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ *
+ * @return One random bit
+ */
+static __u64 jent_measure_jitter(struct rand_data *ec)
+{
+	__u64 time = 0;
+	__u64 data = 0;
+	__u64 current_delta = 0;
+
+	/* Invoke one noise source before time measurement to add variations */
+	jent_memaccess(ec, 0);
+
+	/*
+	 * Get time stamp and calculate time delta to previous
+	 * invocation to measure the timing variations
+	 */
+	jent_get_nstime(&time);
+	current_delta = time - ec->prev_time;
+	ec->prev_time = time;
+
+	/* Now call the next noise sources which also folds the data */
+	jent_fold_time(ec, current_delta, &data, 0);
+
+	/*
+	 * Check whether we have a stuck measurement. The enforcement
+	 * is performed after the stuck value has been mixed into the
+	 * entropy pool.
+	 */
+	jent_stuck(ec, current_delta);
+
+	return data;
+}
+
+/**
+ * Von Neuman unbias as explained in RFC 4086 section 4.2. As shown in the
+ * documentation of that RNG, the bits from jent_measure_jitter are considered
+ * independent which implies that the Von Neuman unbias operation is applicable.
+ * A proof of the Von-Neumann unbias operation to remove skews is given in the
+ * document "A proposal for: Functionality classes for random number
+ * generators", version 2.0 by Werner Schindler, section 5.4.1.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ *
+ * @return One random bit
+ */
+static __u64 jent_unbiased_bit(struct rand_data *entropy_collector)
+{
+	do {
+		__u64 a = jent_measure_jitter(entropy_collector);
+		__u64 b = jent_measure_jitter(entropy_collector);
+
+		if (a == b)
+			continue;
+		if (1 == a)
+			return 1;
+		else
+			return 0;
+	} while (1);
+}
+
+/**
+ * Shuffle the pool a bit by mixing some value with a bijective function (XOR)
+ * into the pool.
+ *
+ * The function generates a mixer value that depends on the bits set and the
+ * location of the set bits in the random number generated by the entropy
+ * source. Therefore, based on the generated random number, this mixer value
+ * can have 2**64 different values. That mixer value is initialized with the
+ * first two SHA-1 constants. After obtaining the mixer value, it is XORed into
+ * the random number.
+ *
+ * The mixer value is not assumed to contain any entropy. But due to the XOR
+ * operation, it can also not destroy any entropy present in the entropy pool.
+ *
+ * Input:
+ * @entropy_collector Reference to entropy collector
+ */
+static void jent_stir_pool(struct rand_data *entropy_collector)
+{
+	/*
+	 * to shut up GCC on 32 bit, we have to initialize the 64 variable
+	 * with two 32 bit variables
+	 */
+	union c {
+		__u64 u64;
+		__u32 u32[2];
+	};
+	/*
+	 * This constant is derived from the first two 32 bit initialization
+	 * vectors of SHA-1 as defined in FIPS 180-4 section 5.3.1
+	 */
+	union c constant;
+	/*
+	 * The start value of the mixer variable is derived from the third
+	 * and fourth 32 bit initialization vector of SHA-1 as defined in
+	 * FIPS 180-4 section 5.3.1
+	 */
+	union c mixer;
+	unsigned int i = 0;
+
+	/*
+	 * Store the SHA-1 constants in reverse order to make up the 64 bit
+	 * value -- this applies to a little endian system, on a big endian
+	 * system, it reverses as expected. But this really does not matter
+	 * as we do not rely on the specific numbers. We just pick the SHA-1
+	 * constants as they have a good mix of bit set and unset.
+	 */
+	constant.u32[1] = 0x67452301;
+	constant.u32[0] = 0xefcdab89;
+	mixer.u32[1] = 0x98badcfe;
+	mixer.u32[0] = 0x10325476;
+
+	for (i = 0; i < DATA_SIZE_BITS; i++) {
+		/*
+		 * get the i-th bit of the input random number and only XOR
+		 * the constant into the mixer value when that bit is set
+		 */
+		if ((entropy_collector->data >> i) & 1)
+			mixer.u64 ^= constant.u64;
+		mixer.u64 = rol64(mixer.u64, 1);
+	}
+	entropy_collector->data ^= mixer.u64;
+}
+
+/**
+ * Generator of one 64 bit random number
+ * Function fills rand_data->data
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ */
+static void jent_gen_entropy(struct rand_data *ec)
+{
+	unsigned int k = 0;
+
+	/* priming of the ->prev_time value */
+	jent_measure_jitter(ec);
+
+	while (1) {
+		__u64 data = 0;
+
+		if (ec->disable_unbias == 1)
+			data = jent_measure_jitter(ec);
+		else
+			data = jent_unbiased_bit(ec);
+
+		/* enforcement of the jent_stuck test */
+		if (ec->stuck) {
+			/*
+			 * We only mix in the bit considered not appropriate
+			 * without the LSFR. The reason is that if we apply
+			 * the LSFR and we do not rotate, the 2nd bit with LSFR
+			 * will cancel out the first LSFR application on the
+			 * bad bit.
+			 *
+			 * And we do not rotate as we apply the next bit to the
+			 * current bit location again.
+			 */
+			ec->data ^= data;
+			ec->stuck = 0;
+			continue;
+		}
+
+		/*
+		 * Fibonacci LSFR with polynom of
+		 *  x^64 + x^61 + x^56 + x^31 + x^28 + x^23 + 1 which is
+		 *  primitive according to
+		 *   http://poincare.matf.bg.ac.rs/~ezivkovm/publications/primpol1.pdf
+		 * (the shift values are the polynom values minus one
+		 * due to counting bits from 0 to 63). As the current
+		 * position is always the LSB, the polynom only needs
+		 * to shift data in from the left without wrap.
+		 */
+		ec->data ^= data;
+		ec->data ^= ((ec->data >> 63) & 1);
+		ec->data ^= ((ec->data >> 60) & 1);
+		ec->data ^= ((ec->data >> 55) & 1);
+		ec->data ^= ((ec->data >> 30) & 1);
+		ec->data ^= ((ec->data >> 27) & 1);
+		ec->data ^= ((ec->data >> 22) & 1);
+		ec->data = rol64(ec->data, 1);
+
+		/*
+		 * We multiply the loop value with ->osr to obtain the
+		 * oversampling rate requested by the caller
+		 */
+		if (++k >= (DATA_SIZE_BITS * ec->osr))
+			break;
+	}
+	if (ec->stir)
+		jent_stir_pool(ec);
+}
+
+/**
+ * The continuous test required by FIPS 140-2 -- the function automatically
+ * primes the test if needed.
+ *
+ * Return:
+ * 0 if FIPS test passed
+ * < 0 if FIPS test failed
+ */
+static void jent_fips_test(struct rand_data *ec)
+{
+	if (!fips_enabled)
+		return;
+
+	/* prime the FIPS test */
+	if (!ec->old_data) {
+		ec->old_data = ec->data;
+		jent_gen_entropy(ec);
+	}
+
+	if (ec->data == ec->old_data)
+		panic(DRIVER_NAME ": Duplicate output detected\n");
+
+	ec->old_data = ec->data;
+}
+
+
+/**
+ * Entry function: Obtain entropy for the caller.
+ *
+ * This function invokes the entropy gathering logic as often to generate
+ * as many bytes as requested by the caller. The entropy gathering logic
+ * creates 64 bit per invocation.
+ *
+ * This function truncates the last 64 bit entropy value output to the exact
+ * size specified by the caller.
+ *
+ * Input:
+ * @ec Reference to entropy collector
+ * @data pointer to buffer for storing random data -- buffer must already
+ *	 exist
+ * @len size of the buffer, specifying also the requested number of random
+ *	in bytes
+ *
+ * @return 0 when request is fulfilled or an error
+ *
+ * The following error codes can occur:
+ *	-1	entropy_collector is NULL
+ */
+static ssize_t jent_read_entropy(struct rand_data *ec, u8 *data, size_t len)
+{
+	u8 *p = data;
+
+	if (!ec)
+		return -EINVAL;
+
+	while (0 < len) {
+		size_t tocopy;
+
+		jent_gen_entropy(ec);
+		jent_fips_test(ec);
+		if ((DATA_SIZE_BITS / 8) < len)
+			tocopy = (DATA_SIZE_BITS / 8);
+		else
+			tocopy = len;
+		memcpy(p, &ec->data, tocopy);
+
+		len -= tocopy;
+		p += tocopy;
+	}
+
+	return 0;
+}
+
+/***************************************************************************
+ * Initialization logic
+ ***************************************************************************/
+
+static struct rand_data *jent_entropy_collector_alloc(unsigned int osr,
+						      unsigned int flags)
+{
+	struct rand_data *entropy_collector;
+
+	entropy_collector = kzalloc(sizeof(struct rand_data), GFP_KERNEL);
+	if (!entropy_collector)
+		return NULL;
+
+	if (!(flags & JENT_DISABLE_MEMORY_ACCESS)) {
+		/* Allocate memory for adding variations based on memory
+		 * access
+		 */
+		entropy_collector->mem = kzalloc(JENT_MEMORY_SIZE, GFP_KERNEL);
+		if (!entropy_collector->mem) {
+			kfree(entropy_collector);
+			return NULL;
+		}
+		entropy_collector->memblocksize = JENT_MEMORY_BLOCKSIZE;
+		entropy_collector->memblocks = JENT_MEMORY_BLOCKS;
+		entropy_collector->memaccessloops = JENT_MEMORY_ACCESSLOOPS;
+	}
+
+	/* verify and set the oversampling rate */
+	if (0 == osr)
+		osr = 1; /* minimum sampling rate is 1 */
+	entropy_collector->osr = osr;
+
+	entropy_collector->stir = 1;
+	if (flags & JENT_DISABLE_STIR)
+		entropy_collector->stir = 0;
+	if (flags & JENT_DISABLE_UNBIAS)
+		entropy_collector->disable_unbias = 1;
+
+	/* fill the data pad with non-zero values */
+	jent_gen_entropy(entropy_collector);
+
+	return entropy_collector;
+}
+
+static void jent_entropy_collector_free(struct rand_data *entropy_collector)
+{
+	if (entropy_collector->mem)
+		kzfree(entropy_collector->mem);
+	entropy_collector->mem = NULL;
+	if (entropy_collector)
+		kzfree(entropy_collector);
+	entropy_collector = NULL;
+}
+
+static int jent_entropy_init(void)
+{
+	int i;
+	__u64 delta_sum = 0;
+	__u64 old_delta = 0;
+	int time_backwards = 0;
+	int count_var = 0;
+	int count_mod = 0;
+
+	/* We could perform statistical tests here, but the problem is
+	 * that we only have a few loop counts to do testing. These
+	 * loop counts may show some slight skew and we produce
+	 * false positives.
+	 *
+	 * Moreover, only old systems show potentially problematic
+	 * jitter entropy that could potentially be caught here. But
+	 * the RNG is intended for hardware that is available or widely
+	 * used, but not old systems that are long out of favor. Thus,
+	 * no statistical tests.
+	 */
+
+	/*
+	 * We could add a check for system capabilities such as clock_getres or
+	 * check for CONFIG_X86_TSC, but it does not make much sense as the
+	 * following sanity checks verify that we have a high-resolution
+	 * timer.
+	 */
+	/*
+	 * TESTLOOPCOUNT needs some loops to identify edge systems. 100 is
+	 * definitely too little.
+	 */
+#define TESTLOOPCOUNT 300
+#define CLEARCACHE 100
+	for (i = 0; (TESTLOOPCOUNT + CLEARCACHE) > i; i++) {
+		__u64 time = 0;
+		__u64 time2 = 0;
+		__u64 folded = 0;
+		__u64 delta = 0;
+		unsigned int lowdelta = 0;
+
+		jent_get_nstime(&time);
+		jent_fold_time(NULL, time, &folded, 1<<MIN_FOLD_LOOP_BIT);
+		jent_get_nstime(&time2);
+
+		/* test whether timer works */
+		if (!time || !time2)
+			return JENT_ENOTIME;
+		delta = time2 - time;
+		/*
+		 * test whether timer is fine grained enough to provide
+		 * delta even when called shortly after each other -- this
+		 * implies that we also have a high resolution timer
+		 */
+		if (!delta)
+			return JENT_ECOARSETIME;
+
+		/*
+		 * up to here we did not modify any variable that will be
+		 * evaluated later, but we already performed some work. Thus we
+		 * already have had an impact on the caches, branch prediction,
+		 * etc. with the goal to clear it to get the worst case
+		 * measurements.
+		 */
+		if (CLEARCACHE > i)
+			continue;
+
+		/* test whether we have an increasing timer */
+		if (!(time2 > time))
+			time_backwards++;
+
+		/*
+		 * Avoid modulo of 64 bit integer to allow code to compile
+		 * on 32 bit architectures.
+		 */
+		lowdelta = time2 - time;
+		if (!(lowdelta % 100))
+			count_mod++;
+
+		/*
+		 * ensure that we have a varying delta timer which is necessary
+		 * for the calculation of entropy -- perform this check
+		 * only after the first loop is executed as we need to prime
+		 * the old_data value
+		 */
+		if (i) {
+			if (delta != old_delta)
+				count_var++;
+			if (delta > old_delta)
+				delta_sum += (delta - old_delta);
+			else
+				delta_sum += (old_delta - delta);
+		}
+		old_delta = delta;
+	}
+
+	/*
+	 * we allow up to three times the time running backwards.
+	 * CLOCK_REALTIME is affected by adjtime and NTP operations. Thus,
+	 * if such an operation just happens to interfere with our test, it
+	 * should not fail. The value of 3 should cover the NTP case being
+	 * performed during our test run.
+	 */
+	if (3 < time_backwards)
+		return JENT_ENOMONOTONIC;
+	/* Error if the time variances are always identical */
+	if (!delta_sum)
+		return JENT_EVARVAR;
+
+	/*
+	 * Variations of deltas of time must on average be larger
+	 * than 1 to ensure the entropy estimation
+	 * implied with 1 is preserved
+	 */
+	if (delta_sum <= 1)
+		return JENT_EMINVARVAR;
+
+	/*
+	 * Ensure that we have variations in the time stamp below 10 for at
+	 * least 10% of all checks -- on some platforms, the counter
+	 * increments in multiples of 100, but not always
+	 */
+	if ((TESTLOOPCOUNT/10 * 9) < count_mod)
+		return JENT_ECOARSETIME;
+
+	return 0;
+}
+
+/***************************************************************************
+ * Kernel crypto API interface
+ ***************************************************************************/
+
+struct jitterentropy {
+	spinlock_t jent_lock;
+	struct rand_data *entropy_collector;
+};
+
+static int jent_kcapi_init(struct crypto_tfm *tfm)
+{
+	struct jitterentropy *rng = crypto_tfm_ctx(tfm);
+	int ret = 0;
+
+	rng->entropy_collector = jent_entropy_collector_alloc(1, 0);
+	if (!rng->entropy_collector)
+		ret = -ENOMEM;
+
+	spin_lock_init(&rng->jent_lock);
+	return ret;
+}
+
+static void jent_kcapi_cleanup(struct crypto_tfm *tfm)
+{
+	struct jitterentropy *rng = crypto_tfm_ctx(tfm);
+
+	spin_lock(&rng->jent_lock);
+	if (rng->entropy_collector)
+		jent_entropy_collector_free(rng->entropy_collector);
+	rng->entropy_collector = NULL;
+	spin_unlock(&rng->jent_lock);
+}
+
+static int jent_kcapi_random(struct crypto_rng *tfm,
+			     const u8 *src, unsigned int slen,
+			     u8 *rdata, unsigned int dlen)
+{
+	struct jitterentropy *rng = crypto_rng_ctx(tfm);
+	int ret = 0;
+
+	spin_lock(&rng->jent_lock);
+	ret = jent_read_entropy(rng->entropy_collector, rdata, dlen);
+	spin_unlock(&rng->jent_lock);
+
+	return ret;
+}
+
+static int jent_kcapi_reset(struct crypto_rng *tfm,
+			    const u8 *seed, unsigned int slen)
+{
+	return 0;
+}
+
+static struct rng_alg jent_alg = {
+	.generate		= jent_kcapi_random,
+	.seed			= jent_kcapi_reset,
+	.seedsize		= 0,
+	.base			= {
+		.cra_name               = "jitterentropy_rng",
+		.cra_driver_name        = "jitterentropy_rng",
+		.cra_priority           = 100,
+		.cra_ctxsize            = sizeof(struct jitterentropy),
+		.cra_module             = THIS_MODULE,
+		.cra_init               = jent_kcapi_init,
+		.cra_exit               = jent_kcapi_cleanup,
+
+	}
+};
+
+static int __init jent_mod_init(void)
+{
+	int ret = 0;
+
+	ret = jent_entropy_init();
+	if (ret) {
+		pr_info(DRIVER_NAME ": Initialization failed with host not compliant with requirements: %d\n", ret);
+		return -EFAULT;
+	}
+	return crypto_register_rng(&jent_alg);
+}
+
+static void __exit jent_mod_exit(void)
+{
+	crypto_unregister_rng(&jent_alg);
+}
+
+module_init(jent_mod_init);
+module_exit(jent_mod_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
+MODULE_DESCRIPTION("Non-physical True Random Number Generator based on CPU Jitter");
+MODULE_ALIAS_CRYPTO("jitterentropy_rng");
diff --git a/crypto/krng.c b/crypto/krng.c
index 0224841..40ed78e 100644
--- a/crypto/krng.c
+++ b/crypto/krng.c
@@ -16,31 +16,27 @@
 #include <linux/module.h>
 #include <linux/random.h>
 
-static int krng_get_random(struct crypto_rng *tfm, u8 *rdata, unsigned int dlen)
+static int krng_generate(struct crypto_rng *tfm,
+			 const u8 *src, unsigned int slen,
+			 u8 *rdata, unsigned int dlen)
 {
 	get_random_bytes(rdata, dlen);
 	return 0;
 }
 
-static int krng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static int krng_seed(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 {
 	return 0;
 }
 
-static struct crypto_alg krng_alg = {
-	.cra_name		= "stdrng",
-	.cra_driver_name	= "krng",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
-	.cra_ctxsize		= 0,
-	.cra_type		= &crypto_rng_type,
-	.cra_module		= THIS_MODULE,
-	.cra_u			= {
-		.rng = {
-			.rng_make_random	= krng_get_random,
-			.rng_reset		= krng_reset,
-			.seedsize		= 0,
-		}
+static struct rng_alg krng_alg = {
+	.generate		= krng_generate,
+	.seed			= krng_seed,
+	.base			=	{
+		.cra_name		= "stdrng",
+		.cra_driver_name	= "krng",
+		.cra_priority		= 200,
+		.cra_module		= THIS_MODULE,
 	}
 };
 
@@ -48,13 +44,12 @@ static struct crypto_alg krng_alg = {
 /* Module initalization */
 static int __init krng_mod_init(void)
 {
-	return crypto_register_alg(&krng_alg);
+	return crypto_register_rng(&krng_alg);
 }
 
 static void __exit krng_mod_fini(void)
 {
-	crypto_unregister_alg(&krng_alg);
-	return;
+	crypto_unregister_rng(&krng_alg);
 }
 
 module_init(krng_mod_init);
diff --git a/crypto/md5.c b/crypto/md5.c
index 36f5e5b..33d17e9 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -51,10 +51,10 @@ static int md5_init(struct shash_desc *desc)
 {
 	struct md5_state *mctx = shash_desc_ctx(desc);
 
-	mctx->hash[0] = 0x67452301;
-	mctx->hash[1] = 0xefcdab89;
-	mctx->hash[2] = 0x98badcfe;
-	mctx->hash[3] = 0x10325476;
+	mctx->hash[0] = MD5_H0;
+	mctx->hash[1] = MD5_H1;
+	mctx->hash[2] = MD5_H2;
+	mctx->hash[3] = MD5_H3;
 	mctx->byte_count = 0;
 
 	return 0;
diff --git a/crypto/pcompress.c b/crypto/pcompress.c
index 7140fe7..7a13b40 100644
--- a/crypto/pcompress.c
+++ b/crypto/pcompress.c
@@ -38,11 +38,6 @@ static int crypto_pcomp_init(struct crypto_tfm *tfm, u32 type, u32 mask)
 	return 0;
 }
 
-static unsigned int crypto_pcomp_extsize(struct crypto_alg *alg)
-{
-	return alg->cra_ctxsize;
-}
-
 static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm)
 {
 	return 0;
@@ -77,7 +72,7 @@ static void crypto_pcomp_show(struct seq_file *m, struct crypto_alg *alg)
 }
 
 static const struct crypto_type crypto_pcomp_type = {
-	.extsize	= crypto_pcomp_extsize,
+	.extsize	= crypto_alg_extsize,
 	.init		= crypto_pcomp_init,
 	.init_tfm	= crypto_pcomp_init_tfm,
 #ifdef CONFIG_PROC_FS
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index c305d41..ff174b6 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -20,6 +20,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
+#include <linux/atomic.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -60,8 +61,8 @@ static struct padata_pcrypt pdecrypt;
 static struct kset           *pcrypt_kset;
 
 struct pcrypt_instance_ctx {
-	struct crypto_spawn spawn;
-	unsigned int tfm_count;
+	struct crypto_aead_spawn spawn;
+	atomic_t tfm_count;
 };
 
 struct pcrypt_aead_ctx {
@@ -278,9 +279,8 @@ static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm)
 	struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct crypto_aead *cipher;
 
-	ictx->tfm_count++;
-
-	cpu_index = ictx->tfm_count % cpumask_weight(cpu_online_mask);
+	cpu_index = (unsigned int)atomic_inc_return(&ictx->tfm_count) %
+		    cpumask_weight(cpu_online_mask);
 
 	ctx->cb_cpu = cpumask_first(cpu_online_mask);
 	for (cpu = 0; cpu < cpu_index; cpu++)
@@ -292,9 +292,10 @@ static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm)
 		return PTR_ERR(cipher);
 
 	ctx->child = cipher;
-	tfm->crt_aead.reqsize = sizeof(struct pcrypt_request)
-		+ sizeof(struct aead_givcrypt_request)
-		+ crypto_aead_reqsize(cipher);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct pcrypt_request) +
+		sizeof(struct aead_givcrypt_request) +
+		crypto_aead_reqsize(cipher));
 
 	return 0;
 }
@@ -306,57 +307,50 @@ static void pcrypt_aead_exit_tfm(struct crypto_tfm *tfm)
 	crypto_free_aead(ctx->child);
 }
 
-static struct crypto_instance *pcrypt_alloc_instance(struct crypto_alg *alg)
+static int pcrypt_init_instance(struct crypto_instance *inst,
+				struct crypto_alg *alg)
 {
-	struct crypto_instance *inst;
-	struct pcrypt_instance_ctx *ctx;
-	int err;
-
-	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
-	if (!inst) {
-		inst = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-
-	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
 		     "pcrypt(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
-		goto out_free_inst;
+		return -ENAMETOOLONG;
 
 	memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
 
-	ctx = crypto_instance_ctx(inst);
-	err = crypto_init_spawn(&ctx->spawn, alg, inst,
-				CRYPTO_ALG_TYPE_MASK);
-	if (err)
-		goto out_free_inst;
-
 	inst->alg.cra_priority = alg->cra_priority + 100;
 	inst->alg.cra_blocksize = alg->cra_blocksize;
 	inst->alg.cra_alignmask = alg->cra_alignmask;
 
-out:
-	return inst;
-
-out_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out;
+	return 0;
 }
 
 static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
 						 u32 type, u32 mask)
 {
+	struct pcrypt_instance_ctx *ctx;
 	struct crypto_instance *inst;
 	struct crypto_alg *alg;
+	const char *name;
+	int err;
+
+	name = crypto_attr_alg_name(tb[1]);
+	if (IS_ERR(name))
+		return ERR_CAST(name);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+	if (!inst)
+		return ERR_PTR(-ENOMEM);
+
+	ctx = crypto_instance_ctx(inst);
+	crypto_set_aead_spawn(&ctx->spawn, inst);
 
-	alg = crypto_get_attr_alg(tb, type, (mask & CRYPTO_ALG_TYPE_MASK));
-	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+	err = crypto_grab_aead(&ctx->spawn, name, 0, 0);
+	if (err)
+		goto out_free_inst;
 
-	inst = pcrypt_alloc_instance(alg);
-	if (IS_ERR(inst))
-		goto out_put_alg;
+	alg = crypto_aead_spawn_alg(&ctx->spawn);
+	err = pcrypt_init_instance(inst, alg);
+	if (err)
+		goto out_drop_aead;
 
 	inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
 	inst->alg.cra_type = &crypto_aead_type;
@@ -376,9 +370,15 @@ static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb,
 	inst->alg.cra_aead.decrypt = pcrypt_aead_decrypt;
 	inst->alg.cra_aead.givencrypt = pcrypt_aead_givencrypt;
 
-out_put_alg:
-	crypto_mod_put(alg);
+out:
 	return inst;
+
+out_drop_aead:
+	crypto_drop_aead(&ctx->spawn);
+out_free_inst:
+	kfree(inst);
+	inst = ERR_PTR(err);
+	goto out;
 }
 
 static struct crypto_instance *pcrypt_alloc(struct rtattr **tb)
@@ -401,7 +401,7 @@ static void pcrypt_free(struct crypto_instance *inst)
 {
 	struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
 
-	crypto_drop_spawn(&ctx->spawn);
+	crypto_drop_aead(&ctx->spawn);
 	kfree(inst);
 }
 
diff --git a/crypto/proc.c b/crypto/proc.c
index 4ffe73b..2cc10c9 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -20,47 +20,8 @@
 #include <linux/rwsem.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/sysctl.h>
 #include "internal.h"
 
-#ifdef CONFIG_CRYPTO_FIPS
-static struct ctl_table crypto_sysctl_table[] = {
-	{
-		.procname       = "fips_enabled",
-		.data           = &fips_enabled,
-		.maxlen         = sizeof(int),
-		.mode           = 0444,
-		.proc_handler   = proc_dointvec
-	},
-	{}
-};
-
-static struct ctl_table crypto_dir_table[] = {
-	{
-		.procname       = "crypto",
-		.mode           = 0555,
-		.child          = crypto_sysctl_table
-	},
-	{}
-};
-
-static struct ctl_table_header *crypto_sysctls;
-
-static void crypto_proc_fips_init(void)
-{
-	crypto_sysctls = register_sysctl_table(crypto_dir_table);
-}
-
-static void crypto_proc_fips_exit(void)
-{
-	if (crypto_sysctls)
-		unregister_sysctl_table(crypto_sysctls);
-}
-#else
-#define crypto_proc_fips_init()
-#define crypto_proc_fips_exit()
-#endif
-
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
 	down_read(&crypto_alg_sem);
@@ -148,11 +109,9 @@ static const struct file_operations proc_crypto_ops = {
 void __init crypto_init_proc(void)
 {
 	proc_create("crypto", 0, NULL, &proc_crypto_ops);
-	crypto_proc_fips_init();
 }
 
 void __exit crypto_exit_proc(void)
 {
-	crypto_proc_fips_exit();
 	remove_proc_entry("crypto", NULL);
 }
diff --git a/crypto/rng.c b/crypto/rng.c
index e0a25c2..1315505 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -4,6 +4,7 @@
  * RNG operations.
  *
  * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com>
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -24,12 +25,19 @@
 #include <linux/cryptouser.h>
 #include <net/netlink.h>
 
+#include "internal.h"
+
 static DEFINE_MUTEX(crypto_default_rng_lock);
 struct crypto_rng *crypto_default_rng;
 EXPORT_SYMBOL_GPL(crypto_default_rng);
 static int crypto_default_rng_refcnt;
 
-static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_rng, base);
+}
+
+int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 {
 	u8 *buf = NULL;
 	int err;
@@ -43,21 +51,23 @@ static int rngapi_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
 		seed = buf;
 	}
 
-	err = crypto_rng_alg(tfm)->rng_reset(tfm, seed, slen);
+	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
 
-	kfree(buf);
+	kzfree(buf);
 	return err;
 }
+EXPORT_SYMBOL_GPL(crypto_rng_reset);
 
-static int crypto_init_rng_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
+static int crypto_rng_init_tfm(struct crypto_tfm *tfm)
 {
-	struct rng_alg *alg = &tfm->__crt_alg->cra_rng;
-	struct rng_tfm *ops = &tfm->crt_rng;
+	return 0;
+}
 
-	ops->rng_gen_random = alg->rng_make_random;
-	ops->rng_reset = rngapi_reset;
+static unsigned int seedsize(struct crypto_alg *alg)
+{
+	struct rng_alg *ralg = container_of(alg, struct rng_alg, base);
 
-	return 0;
+	return ralg->seedsize;
 }
 
 #ifdef CONFIG_NET
@@ -67,7 +77,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
 
 	strncpy(rrng.type, "rng", sizeof(rrng.type));
 
-	rrng.seedsize = alg->cra_rng.seedsize;
+	rrng.seedsize = seedsize(alg);
 
 	if (nla_put(skb, CRYPTOCFGA_REPORT_RNG,
 		    sizeof(struct crypto_report_rng), &rrng))
@@ -89,24 +99,27 @@ static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
 static void crypto_rng_show(struct seq_file *m, struct crypto_alg *alg)
 {
 	seq_printf(m, "type         : rng\n");
-	seq_printf(m, "seedsize     : %u\n", alg->cra_rng.seedsize);
-}
-
-static unsigned int crypto_rng_ctxsize(struct crypto_alg *alg, u32 type,
-				       u32 mask)
-{
-	return alg->cra_ctxsize;
+	seq_printf(m, "seedsize     : %u\n", seedsize(alg));
 }
 
-const struct crypto_type crypto_rng_type = {
-	.ctxsize = crypto_rng_ctxsize,
-	.init = crypto_init_rng_ops,
+static const struct crypto_type crypto_rng_type = {
+	.extsize = crypto_alg_extsize,
+	.init_tfm = crypto_rng_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_rng_show,
 #endif
 	.report = crypto_rng_report,
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_MASK,
+	.type = CRYPTO_ALG_TYPE_RNG,
+	.tfmsize = offsetof(struct crypto_rng, base),
 };
-EXPORT_SYMBOL_GPL(crypto_rng_type);
+
+struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_rng_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_rng);
 
 int crypto_get_default_rng(void)
 {
@@ -150,5 +163,55 @@ void crypto_put_default_rng(void)
 }
 EXPORT_SYMBOL_GPL(crypto_put_default_rng);
 
+int crypto_register_rng(struct rng_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+
+	if (alg->seedsize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_rng_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_RNG;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_rng);
+
+void crypto_unregister_rng(struct rng_alg *alg)
+{
+	crypto_unregister_alg(&alg->base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_rng);
+
+int crypto_register_rngs(struct rng_alg *algs, int count)
+{
+	int i, ret;
+
+	for (i = 0; i < count; i++) {
+		ret = crypto_register_rng(algs + i);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	for (--i; i >= 0; --i)
+		crypto_unregister_rng(algs + i);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(crypto_register_rngs);
+
+void crypto_unregister_rngs(struct rng_alg *algs, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_rng(algs + i);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_rngs);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Random Number Generator");
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 3bd749c..8690324 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -104,22 +104,18 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 			      unsigned int start, unsigned int nbytes, int out)
 {
 	struct scatter_walk walk;
-	unsigned int offset = 0;
+	struct scatterlist tmp[2];
 
 	if (!nbytes)
 		return;
 
-	for (;;) {
-		scatterwalk_start(&walk, sg);
-
-		if (start < offset + sg->length)
-			break;
+	sg = scatterwalk_ffwd(tmp, sg, start);
 
-		offset += sg->length;
-		sg = sg_next(sg);
-	}
+	if (sg_page(sg) == virt_to_page(buf) &&
+	    sg->offset == offset_in_page(buf))
+		return;
 
-	scatterwalk_advance(&walk, start - offset);
+	scatterwalk_start(&walk, sg);
 	scatterwalk_copychunks(buf, &walk, nbytes, out);
 	scatterwalk_done(&walk, out, 0);
 }
@@ -146,3 +142,25 @@ int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes)
 	return n;
 }
 EXPORT_SYMBOL_GPL(scatterwalk_bytes_sglen);
+
+struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
+				     struct scatterlist *src,
+				     unsigned int len)
+{
+	for (;;) {
+		if (!len)
+			return src;
+
+		if (src->length > len)
+			break;
+
+		len -= src->length;
+		src = sg_next(src);
+	}
+
+	sg_set_page(dst, sg_page(src), src->length - len, src->offset + len);
+	scatterwalk_crypto_chain(dst, sg_next(src), 0, 2);
+
+	return dst;
+}
+EXPORT_SYMBOL_GPL(scatterwalk_ffwd);
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index b7bb9a2..127970a 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -15,7 +15,9 @@
 
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/null.h>
 #include <crypto/rng.h>
+#include <crypto/scatterwalk.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -24,11 +26,41 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+struct seqniv_request_ctx {
+	struct scatterlist dst[2];
+	struct aead_request subreq;
+};
+
 struct seqiv_ctx {
 	spinlock_t lock;
 	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
 };
 
+struct seqiv_aead_ctx {
+	struct crypto_aead *child;
+	spinlock_t lock;
+	struct crypto_blkcipher *null;
+	u8 salt[] __attribute__ ((aligned(__alignof__(u32))));
+};
+
+static void seqiv_free(struct crypto_instance *inst);
+
+static int seqiv_aead_setkey(struct crypto_aead *tfm,
+			     const u8 *key, unsigned int keylen)
+{
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setkey(ctx->child, key, keylen);
+}
+
+static int seqiv_aead_setauthsize(struct crypto_aead *tfm,
+				  unsigned int authsize)
+{
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
 static void seqiv_complete2(struct skcipher_givcrypt_request *req, int err)
 {
 	struct ablkcipher_request *subreq = skcipher_givcrypt_reqctx(req);
@@ -81,6 +113,77 @@ static void seqiv_aead_complete(struct crypto_async_request *base, int err)
 	aead_givcrypt_complete(req, err);
 }
 
+static void seqiv_aead_encrypt_complete2(struct aead_request *req, int err)
+{
+	struct aead_request *subreq = aead_request_ctx(req);
+	struct crypto_aead *geniv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (err)
+		goto out;
+
+	geniv = crypto_aead_reqtfm(req);
+	memcpy(req->iv, subreq->iv, crypto_aead_ivsize(geniv));
+
+out:
+	kzfree(subreq->iv);
+}
+
+static void seqiv_aead_encrypt_complete(struct crypto_async_request *base,
+					int err)
+{
+	struct aead_request *req = base->data;
+
+	seqiv_aead_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static void seqniv_aead_encrypt_complete2(struct aead_request *req, int err)
+{
+	unsigned int ivsize = 8;
+	u8 data[20];
+
+	if (err == -EINPROGRESS)
+		return;
+
+	/* Swap IV and ESP header back to correct order. */
+	scatterwalk_map_and_copy(data, req->dst, 0, req->assoclen + ivsize, 0);
+	scatterwalk_map_and_copy(data + ivsize, req->dst, 0, req->assoclen, 1);
+	scatterwalk_map_and_copy(data, req->dst, req->assoclen, ivsize, 1);
+}
+
+static void seqniv_aead_encrypt_complete(struct crypto_async_request *base,
+					int err)
+{
+	struct aead_request *req = base->data;
+
+	seqniv_aead_encrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
+static void seqniv_aead_decrypt_complete2(struct aead_request *req, int err)
+{
+	u8 data[4];
+
+	if (err == -EINPROGRESS)
+		return;
+
+	/* Move ESP header back to correct location. */
+	scatterwalk_map_and_copy(data, req->dst, 16, req->assoclen - 8, 0);
+	scatterwalk_map_and_copy(data, req->dst, 8, req->assoclen - 8, 1);
+}
+
+static void seqniv_aead_decrypt_complete(struct crypto_async_request *base,
+					 int err)
+{
+	struct aead_request *req = base->data;
+
+	seqniv_aead_decrypt_complete2(req, err);
+	aead_request_complete(req, err);
+}
+
 static void seqiv_geniv(struct seqiv_ctx *ctx, u8 *info, u64 seq,
 			unsigned int ivsize)
 {
@@ -186,6 +289,228 @@ static int seqiv_aead_givencrypt(struct aead_givcrypt_request *req)
 	return err;
 }
 
+static int seqiv_aead_encrypt_compat(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct seqniv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq;
+	struct scatterlist *dst;
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+	u8 buf[20] __attribute__ ((aligned(__alignof__(u32))));
+	int err;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	/* ESP AD is at most 12 bytes (ESN). */
+	if (req->assoclen > 12)
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = seqniv_aead_encrypt_complete;
+	data = req;
+
+	if (req->src != req->dst) {
+		struct scatterlist srcbuf[2];
+		struct scatterlist dstbuf[2];
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(
+			&desc,
+			scatterwalk_ffwd(dstbuf, req->dst,
+					 req->assoclen + ivsize),
+			scatterwalk_ffwd(srcbuf, req->src,
+					 req->assoclen + ivsize),
+			req->cryptlen - ivsize);
+		if (err)
+			return err;
+	}
+
+	dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize);
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, dst, dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	memcpy(buf, req->iv, ivsize);
+	crypto_xor(buf, ctx->salt, ivsize);
+	memcpy(req->iv, buf, ivsize);
+
+	/* Swap order of IV and ESP AD for ICV generation. */
+	scatterwalk_map_and_copy(buf + ivsize, req->dst, 0, req->assoclen, 0);
+	scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 1);
+
+	err = crypto_aead_encrypt(subreq);
+	seqniv_aead_encrypt_complete2(req, err);
+	return err;
+}
+
+static int seqiv_aead_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	u8 *info;
+	unsigned int ivsize = 8;
+	int err;
+
+	if (req->cryptlen < ivsize)
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+	info = req->iv;
+
+	if (req->src != req->dst) {
+		struct scatterlist src[2];
+		struct scatterlist dst[2];
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(
+			&desc,
+			scatterwalk_ffwd(dst, req->dst,
+					 req->assoclen + ivsize),
+			scatterwalk_ffwd(src, req->src,
+					 req->assoclen + ivsize),
+			req->cryptlen - ivsize);
+		if (err)
+			return err;
+	}
+
+	if (unlikely(!IS_ALIGNED((unsigned long)info,
+				 crypto_aead_alignmask(geniv) + 1))) {
+		info = kmalloc(ivsize, req->base.flags &
+				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
+								  GFP_ATOMIC);
+		if (!info)
+			return -ENOMEM;
+
+		memcpy(info, req->iv, ivsize);
+		compl = seqiv_aead_encrypt_complete;
+		data = req;
+	}
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->dst, req->dst,
+			       req->cryptlen - ivsize, info);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	crypto_xor(info, ctx->salt, ivsize);
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+
+	err = crypto_aead_encrypt(subreq);
+	if (unlikely(info != req->iv))
+		seqiv_aead_encrypt_complete2(req, err);
+	return err;
+}
+
+static int seqiv_aead_decrypt_compat(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct seqniv_request_ctx *rctx = aead_request_ctx(req);
+	struct aead_request *subreq = &rctx->subreq;
+	struct scatterlist *dst;
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+	u8 buf[20];
+	int err;
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	if (req->assoclen > 12)
+		return -EINVAL;
+	else if (req->assoclen > 8) {
+		compl = seqniv_aead_decrypt_complete;
+		data = req;
+	}
+
+	if (req->src != req->dst) {
+		struct scatterlist srcbuf[2];
+		struct scatterlist dstbuf[2];
+		struct blkcipher_desc desc = {
+			.tfm = ctx->null,
+		};
+
+		err = crypto_blkcipher_encrypt(
+			&desc,
+			scatterwalk_ffwd(dstbuf, req->dst,
+					 req->assoclen + ivsize),
+			scatterwalk_ffwd(srcbuf, req->src,
+					 req->assoclen + ivsize),
+			req->cryptlen - ivsize);
+		if (err)
+			return err;
+	}
+
+	/* Move ESP AD forward for ICV generation. */
+	scatterwalk_map_and_copy(buf, req->dst, 0, req->assoclen + ivsize, 0);
+	memcpy(req->iv, buf + req->assoclen, ivsize);
+	scatterwalk_map_and_copy(buf, req->dst, ivsize, req->assoclen, 1);
+
+	dst = scatterwalk_ffwd(rctx->dst, req->dst, ivsize);
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, dst, dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen);
+
+	err = crypto_aead_decrypt(subreq);
+	if (req->assoclen > 8)
+		seqniv_aead_decrypt_complete2(req, err);
+	return err;
+}
+
+static int seqiv_aead_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	struct aead_request *subreq = aead_request_ctx(req);
+	crypto_completion_t compl;
+	void *data;
+	unsigned int ivsize = 8;
+
+	if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+		return -EINVAL;
+
+	aead_request_set_tfm(subreq, ctx->child);
+
+	compl = req->base.complete;
+	data = req->base.data;
+
+	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_crypt(subreq, req->src, req->dst,
+			       req->cryptlen - ivsize, req->iv);
+	aead_request_set_ad(subreq, req->assoclen + ivsize);
+
+	scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
+	if (req->src != req->dst)
+		scatterwalk_map_and_copy(req->iv, req->dst,
+					 req->assoclen, ivsize, 1);
+
+	return crypto_aead_decrypt(subreq);
+}
+
 static int seqiv_givencrypt_first(struct skcipher_givcrypt_request *req)
 {
 	struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req);
@@ -232,6 +557,52 @@ unlock:
 	return seqiv_aead_givencrypt(req);
 }
 
+static int seqiv_aead_encrypt_compat_first(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	int err = 0;
+
+	spin_lock_bh(&ctx->lock);
+	if (geniv->encrypt != seqiv_aead_encrypt_compat_first)
+		goto unlock;
+
+	geniv->encrypt = seqiv_aead_encrypt_compat;
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+
+unlock:
+	spin_unlock_bh(&ctx->lock);
+
+	if (err)
+		return err;
+
+	return seqiv_aead_encrypt_compat(req);
+}
+
+static int seqiv_aead_encrypt_first(struct aead_request *req)
+{
+	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	int err = 0;
+
+	spin_lock_bh(&ctx->lock);
+	if (geniv->encrypt != seqiv_aead_encrypt_first)
+		goto unlock;
+
+	geniv->encrypt = seqiv_aead_encrypt;
+	err = crypto_rng_get_bytes(crypto_default_rng, ctx->salt,
+				   crypto_aead_ivsize(geniv));
+
+unlock:
+	spin_unlock_bh(&ctx->lock);
+
+	if (err)
+		return err;
+
+	return seqiv_aead_encrypt(req);
+}
+
 static int seqiv_init(struct crypto_tfm *tfm)
 {
 	struct crypto_ablkcipher *geniv = __crypto_ablkcipher_cast(tfm);
@@ -244,34 +615,81 @@ static int seqiv_init(struct crypto_tfm *tfm)
 	return skcipher_geniv_init(tfm);
 }
 
-static int seqiv_aead_init(struct crypto_tfm *tfm)
+static int seqiv_old_aead_init(struct crypto_tfm *tfm)
 {
 	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
 	struct seqiv_ctx *ctx = crypto_aead_ctx(geniv);
 
 	spin_lock_init(&ctx->lock);
 
-	tfm->crt_aead.reqsize = sizeof(struct aead_request);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct aead_request));
 
 	return aead_geniv_init(tfm);
 }
 
-static struct crypto_template seqiv_tmpl;
+static int seqiv_aead_init_common(struct crypto_tfm *tfm, unsigned int reqsize)
+{
+	struct crypto_aead *geniv = __crypto_aead_cast(tfm);
+	struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
+	int err;
+
+	spin_lock_init(&ctx->lock);
+
+	crypto_aead_set_reqsize(geniv, sizeof(struct aead_request));
+
+	ctx->null = crypto_get_default_null_skcipher();
+	err = PTR_ERR(ctx->null);
+	if (IS_ERR(ctx->null))
+		goto out;
+
+	err = aead_geniv_init(tfm);
+	if (err)
+		goto drop_null;
+
+	ctx->child = geniv->child;
+	geniv->child = geniv;
+
+out:
+	return err;
 
-static struct crypto_instance *seqiv_ablkcipher_alloc(struct rtattr **tb)
+drop_null:
+	crypto_put_default_null_skcipher();
+	goto out;
+}
+
+static int seqiv_aead_init(struct crypto_tfm *tfm)
+{
+	return seqiv_aead_init_common(tfm, sizeof(struct aead_request));
+}
+
+static int seqniv_aead_init(struct crypto_tfm *tfm)
+{
+	return seqiv_aead_init_common(tfm, sizeof(struct seqniv_request_ctx));
+}
+
+static void seqiv_aead_exit(struct crypto_tfm *tfm)
+{
+	struct seqiv_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_aead(ctx->child);
+	crypto_put_default_null_skcipher();
+}
+
+static int seqiv_ablkcipher_create(struct crypto_template *tmpl,
+				   struct rtattr **tb)
 {
 	struct crypto_instance *inst;
+	int err;
 
-	inst = skcipher_geniv_alloc(&seqiv_tmpl, tb, 0, 0);
+	inst = skcipher_geniv_alloc(tmpl, tb, 0, 0);
 
 	if (IS_ERR(inst))
-		goto out;
+		return PTR_ERR(inst);
 
-	if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64)) {
-		skcipher_geniv_free(inst);
-		inst = ERR_PTR(-EINVAL);
-		goto out;
-	}
+	err = -EINVAL;
+	if (inst->alg.cra_ablkcipher.ivsize < sizeof(u64))
+		goto free_inst;
 
 	inst->alg.cra_ablkcipher.givencrypt = seqiv_givencrypt_first;
 
@@ -279,65 +697,174 @@ static struct crypto_instance *seqiv_ablkcipher_alloc(struct rtattr **tb)
 	inst->alg.cra_exit = skcipher_geniv_exit;
 
 	inst->alg.cra_ctxsize += inst->alg.cra_ablkcipher.ivsize;
+	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+
+	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
+
+	err = crypto_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
 out:
-	return inst;
+	return err;
+
+free_inst:
+	skcipher_geniv_free(inst);
+	goto out;
 }
 
-static struct crypto_instance *seqiv_aead_alloc(struct rtattr **tb)
+static int seqiv_old_aead_create(struct crypto_template *tmpl,
+				 struct aead_instance *aead)
 {
-	struct crypto_instance *inst;
-
-	inst = aead_geniv_alloc(&seqiv_tmpl, tb, 0, 0);
-
-	if (IS_ERR(inst))
-		goto out;
+	struct crypto_instance *inst = aead_crypto_instance(aead);
+	int err = -EINVAL;
 
-	if (inst->alg.cra_aead.ivsize < sizeof(u64)) {
-		aead_geniv_free(inst);
-		inst = ERR_PTR(-EINVAL);
-		goto out;
-	}
+	if (inst->alg.cra_aead.ivsize < sizeof(u64))
+		goto free_inst;
 
 	inst->alg.cra_aead.givencrypt = seqiv_aead_givencrypt_first;
 
-	inst->alg.cra_init = seqiv_aead_init;
+	inst->alg.cra_init = seqiv_old_aead_init;
 	inst->alg.cra_exit = aead_geniv_exit;
 
 	inst->alg.cra_ctxsize = inst->alg.cra_aead.ivsize;
+	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+
+	err = crypto_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
 out:
-	return inst;
+	return err;
+
+free_inst:
+	aead_geniv_free(aead);
+	goto out;
 }
 
-static struct crypto_instance *seqiv_alloc(struct rtattr **tb)
+static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+
+	if (IS_ERR(inst))
+		return PTR_ERR(inst);
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+
+	if (inst->alg.base.cra_aead.encrypt)
+		return seqiv_old_aead_create(tmpl, inst);
+
+	err = -EINVAL;
+	if (inst->alg.ivsize != sizeof(u64))
+		goto free_inst;
+
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	inst->alg.setkey = seqiv_aead_setkey;
+	inst->alg.setauthsize = seqiv_aead_setauthsize;
+	inst->alg.encrypt = seqiv_aead_encrypt_first;
+	inst->alg.decrypt = seqiv_aead_decrypt;
+
+	inst->alg.base.cra_init = seqiv_aead_init;
+	inst->alg.base.cra_exit = seqiv_aead_exit;
+
+	inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize;
+
+	if (alg->base.cra_aead.encrypt) {
+		inst->alg.encrypt = seqiv_aead_encrypt_compat_first;
+		inst->alg.decrypt = seqiv_aead_decrypt_compat;
+
+		inst->alg.base.cra_init = seqniv_aead_init;
+		inst->alg.base.cra_exit = seqiv_aead_exit;
+	}
+
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
+
+out:
+	return err;
+
+free_inst:
+	aead_geniv_free(inst);
+	goto out;
+}
+
+static int seqiv_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
-	struct crypto_instance *inst;
 	int err;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	err = crypto_get_default_rng();
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
-		inst = seqiv_ablkcipher_alloc(tb);
+		err = seqiv_ablkcipher_create(tmpl, tb);
 	else
-		inst = seqiv_aead_alloc(tb);
+		err = seqiv_aead_create(tmpl, tb);
+
+	if (err)
+		crypto_put_default_rng();
 
+	return err;
+}
+
+static int seqniv_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct aead_instance *inst;
+	struct crypto_aead_spawn *spawn;
+	struct aead_alg *alg;
+	int err;
+
+	err = crypto_get_default_rng();
+	if (err)
+		return err;
+
+	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
+	err = PTR_ERR(inst);
 	if (IS_ERR(inst))
 		goto put_rng;
 
-	inst->alg.cra_alignmask |= __alignof__(u32) - 1;
-	inst->alg.cra_ctxsize += sizeof(struct seqiv_ctx);
+	err = -EINVAL;
+	if (inst->alg.ivsize != sizeof(u64))
+		goto free_inst;
+
+	spawn = aead_instance_ctx(inst);
+	alg = crypto_spawn_aead_alg(spawn);
+
+	inst->alg.setkey = seqiv_aead_setkey;
+	inst->alg.setauthsize = seqiv_aead_setauthsize;
+	inst->alg.encrypt = seqiv_aead_encrypt_compat_first;
+	inst->alg.decrypt = seqiv_aead_decrypt_compat;
+
+	inst->alg.base.cra_init = seqniv_aead_init;
+	inst->alg.base.cra_exit = seqiv_aead_exit;
+
+	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
+	inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
+	inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize;
+
+	err = aead_register_instance(tmpl, inst);
+	if (err)
+		goto free_inst;
 
 out:
-	return inst;
+	return err;
 
+free_inst:
+	aead_geniv_free(inst);
 put_rng:
 	crypto_put_default_rng();
 	goto out;
@@ -348,20 +875,42 @@ static void seqiv_free(struct crypto_instance *inst)
 	if ((inst->alg.cra_flags ^ CRYPTO_ALG_TYPE_AEAD) & CRYPTO_ALG_TYPE_MASK)
 		skcipher_geniv_free(inst);
 	else
-		aead_geniv_free(inst);
+		aead_geniv_free(aead_instance(inst));
 	crypto_put_default_rng();
 }
 
 static struct crypto_template seqiv_tmpl = {
 	.name = "seqiv",
-	.alloc = seqiv_alloc,
+	.create = seqiv_create,
+	.free = seqiv_free,
+	.module = THIS_MODULE,
+};
+
+static struct crypto_template seqniv_tmpl = {
+	.name = "seqniv",
+	.create = seqniv_create,
 	.free = seqiv_free,
 	.module = THIS_MODULE,
 };
 
 static int __init seqiv_module_init(void)
 {
-	return crypto_register_template(&seqiv_tmpl);
+	int err;
+
+	err = crypto_register_template(&seqiv_tmpl);
+	if (err)
+		goto out;
+
+	err = crypto_register_template(&seqniv_tmpl);
+	if (err)
+		goto out_undo_niv;
+
+out:
+	return err;
+
+out_undo_niv:
+	crypto_unregister_template(&seqiv_tmpl);
+	goto out;
 }
 
 static void __exit seqiv_module_exit(void)
@@ -375,3 +924,4 @@ module_exit(seqiv_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Sequence Number IV Generator");
 MODULE_ALIAS_CRYPTO("seqiv");
+MODULE_ALIAS_CRYPTO("seqniv");
diff --git a/crypto/shash.c b/crypto/shash.c
index 47c7139..ecb1e3d 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -520,11 +520,6 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static unsigned int crypto_shash_extsize(struct crypto_alg *alg)
-{
-	return alg->cra_ctxsize;
-}
-
 #ifdef CONFIG_NET
 static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
@@ -564,7 +559,7 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 
 static const struct crypto_type crypto_shash_type = {
 	.ctxsize = crypto_shash_ctxsize,
-	.extsize = crypto_shash_extsize,
+	.extsize = crypto_alg_extsize,
 	.init = crypto_init_shash_ops,
 	.init_tfm = crypto_shash_init_tfm,
 #ifdef CONFIG_PROC_FS
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 1a28001..2bff613 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -22,8 +22,10 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
@@ -34,7 +36,6 @@
 #include <linux/timex.h>
 #include <linux/interrupt.h>
 #include "tcrypt.h"
-#include "internal.h"
 
 /*
  * Need slab memory for testing (size in number of pages).
@@ -808,7 +809,7 @@ static int test_ahash_jiffies(struct ahash_request *req, int blen,
 
 	for (start = jiffies, end = start + secs * HZ, bcount = 0;
 	     time_before(jiffies, end); bcount++) {
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			return ret;
 		for (pcount = 0; pcount < blen; pcount += plen) {
@@ -877,7 +878,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
 
 	/* Warm-up run. */
 	for (i = 0; i < 4; i++) {
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
@@ -896,7 +897,7 @@ static int test_ahash_cycles(struct ahash_request *req, int blen,
 
 		start = get_cycles();
 
-		ret = crypto_ahash_init(req);
+		ret = do_one_ahash_op(req, crypto_ahash_init(req));
 		if (ret)
 			goto out;
 		for (pcount = 0; pcount < blen; pcount += plen) {
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f9bce3d..277b3ac 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -20,8 +20,10 @@
  *
  */
 
+#include <crypto/aead.h>
 #include <crypto/hash.h>
 #include <linux/err.h>
+#include <linux/fips.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
@@ -2318,6 +2320,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.alg = "compress_null",
 		.test = alg_test_null,
 	}, {
+		.alg = "crc32",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = crc32_tv_template,
+				.count = CRC32_TEST_VECTORS
+			}
+		}
+	}, {
 		.alg = "crc32c",
 		.test = alg_test_crc32c,
 		.fips_allowed = 1,
@@ -3095,6 +3106,10 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "jitterentropy_rng",
+		.fips_allowed = 1,
+		.test = alg_test_null,
+	}, {
 		.alg = "lrw(aes)",
 		.test = alg_test_skcipher,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 62e2485..6003143 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -54,7 +54,7 @@ struct cipher_testvec {
 	unsigned short tap[MAX_TAP];
 	int np;
 	unsigned char also_non_np;
-	unsigned char fail;
+	bool fail;
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
 	unsigned short ilen;
@@ -71,7 +71,7 @@ struct aead_testvec {
 	unsigned char atap[MAX_TAP];
 	int np;
 	int anp;
-	unsigned char fail;
+	bool fail;
 	unsigned char novrfy;	/* ccm dec verification failure expected */
 	unsigned char wk; /* weak key flag */
 	unsigned char klen;
@@ -1822,7 +1822,7 @@ static struct hash_testvec tgr128_tv_template[] = {
 	},
 };
 
-#define GHASH_TEST_VECTORS 5
+#define GHASH_TEST_VECTORS 6
 
 static struct hash_testvec ghash_tv_template[] =
 {
@@ -1875,6 +1875,63 @@ static struct hash_testvec ghash_tv_template[] =
 		.psize	= 20,
 		.digest	= "\xf8\x94\x87\x2a\x4b\x63\x99\x28"
 			  "\x23\xf7\x93\xf7\x19\xf5\x96\xd9",
+	}, {
+		.key	= "\x0a\x1b\x2c\x3d\x4e\x5f\x64\x71"
+			"\x82\x93\xa4\xb5\xc6\xd7\xe8\xf9",
+		.ksize	= 16,
+		.plaintext = "\x56\x6f\x72\x20\x6c\x61\x75\x74"
+			"\x65\x72\x20\x4c\x61\x75\x73\x63"
+			"\x68\x65\x6e\x20\x75\x6e\x64\x20"
+			"\x53\x74\x61\x75\x6e\x65\x6e\x20"
+			"\x73\x65\x69\x20\x73\x74\x69\x6c"
+			"\x6c\x2c\x0a\x64\x75\x20\x6d\x65"
+			"\x69\x6e\x20\x74\x69\x65\x66\x74"
+			"\x69\x65\x66\x65\x73\x20\x4c\x65"
+			"\x62\x65\x6e\x3b\x0a\x64\x61\x73"
+			"\x73\x20\x64\x75\x20\x77\x65\x69"
+			"\xc3\x9f\x74\x20\x77\x61\x73\x20"
+			"\x64\x65\x72\x20\x57\x69\x6e\x64"
+			"\x20\x64\x69\x72\x20\x77\x69\x6c"
+			"\x6c\x2c\x0a\x65\x68\x20\x6e\x6f"
+			"\x63\x68\x20\x64\x69\x65\x20\x42"
+			"\x69\x72\x6b\x65\x6e\x20\x62\x65"
+			"\x62\x65\x6e\x2e\x0a\x0a\x55\x6e"
+			"\x64\x20\x77\x65\x6e\x6e\x20\x64"
+			"\x69\x72\x20\x65\x69\x6e\x6d\x61"
+			"\x6c\x20\x64\x61\x73\x20\x53\x63"
+			"\x68\x77\x65\x69\x67\x65\x6e\x20"
+			"\x73\x70\x72\x61\x63\x68\x2c\x0a"
+			"\x6c\x61\x73\x73\x20\x64\x65\x69"
+			"\x6e\x65\x20\x53\x69\x6e\x6e\x65"
+			"\x20\x62\x65\x73\x69\x65\x67\x65"
+			"\x6e\x2e\x0a\x4a\x65\x64\x65\x6d"
+			"\x20\x48\x61\x75\x63\x68\x65\x20"
+			"\x67\x69\x62\x74\x20\x64\x69\x63"
+			"\x68\x2c\x20\x67\x69\x62\x20\x6e"
+			"\x61\x63\x68\x2c\x0a\x65\x72\x20"
+			"\x77\x69\x72\x64\x20\x64\x69\x63"
+			"\x68\x20\x6c\x69\x65\x62\x65\x6e"
+			"\x20\x75\x6e\x64\x20\x77\x69\x65"
+			"\x67\x65\x6e\x2e\x0a\x0a\x55\x6e"
+			"\x64\x20\x64\x61\x6e\x6e\x20\x6d"
+			"\x65\x69\x6e\x65\x20\x53\x65\x65"
+			"\x6c\x65\x20\x73\x65\x69\x74\x20"
+			"\x77\x65\x69\x74\x2c\x20\x73\x65"
+			"\x69\x20\x77\x65\x69\x74\x2c\x0a"
+			"\x64\x61\x73\x73\x20\x64\x69\x72"
+			"\x20\x64\x61\x73\x20\x4c\x65\x62"
+			"\x65\x6e\x20\x67\x65\x6c\x69\x6e"
+			"\x67\x65\x2c\x0a\x62\x72\x65\x69"
+			"\x74\x65\x20\x64\x69\x63\x68\x20"
+			"\x77\x69\x65\x20\x65\x69\x6e\x20"
+			"\x46\x65\x69\x65\x72\x6b\x6c\x65"
+			"\x69\x64\x0a\xc3\xbc\x62\x65\x72"
+			"\x20\x64\x69\x65\x20\x73\x69\x6e"
+			"\x6e\x65\x6e\x64\x65\x6e\x20\x44"
+			"\x69\x6e\x67\x65\x2e\x2e\x2e\x0a",
+		.psize	= 400,
+		.digest = "\xad\xb1\xc1\xe9\x56\x70\x31\x1d"
+			"\xbb\x5b\xdf\x5e\x70\x72\x1a\x57",
 	},
 };
 
@@ -3018,7 +3075,7 @@ static struct cipher_testvec des_enc_tv_template[] = {
 			  "\xb4\x99\x26\xf7\x1f\xe1\xd4\x90",
 		.rlen	= 24,
 	}, { /* Weak key */
-		.fail	= 1,
+		.fail	= true,
 		.wk	= 1,
 		.key	= "\x01\x01\x01\x01\x01\x01\x01\x01",
 		.klen	= 8,
@@ -28591,7 +28648,7 @@ struct comp_testvec {
 };
 
 struct pcomp_testvec {
-	void *params;
+	const void *params;
 	unsigned int paramsize;
 	int inlen, outlen;
 	char input[COMP_BUF_SIZE];
@@ -28946,6 +29003,440 @@ static struct hash_testvec michael_mic_tv_template[] = {
 };
 
 /*
+ * CRC32 test vectors
+ */
+#define CRC32_TEST_VECTORS 14
+
+static struct hash_testvec crc32_tv_template[] = {
+	{
+		.key = "\x87\xa9\xcb\xed",
+		.ksize = 4,
+		.psize = 0,
+		.digest = "\x87\xa9\xcb\xed",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28",
+		.psize = 40,
+		.digest = "\x3a\xdf\x4b\xb0",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50",
+		.psize = 40,
+		.digest = "\xa9\x7a\x7f\x7b",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78",
+		.psize = 40,
+		.digest = "\xba\xd3\xf8\x1c",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0",
+		.psize = 40,
+		.digest = "\xa8\xa9\xc2\x02",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8",
+		.psize = 40,
+		.digest = "\x27\xf0\x57\xe2",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 40,
+		.digest = "\x49\x78\x10\x08",
+	},
+	{
+		.key = "\x80\xea\xd3\xf1",
+		.ksize = 4,
+		.plaintext = "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50",
+		.psize = 40,
+		.digest = "\x9a\xb1\xdc\xf0",
+	},
+	{
+		.key = "\xf3\x4a\x1d\x5d",
+		.ksize = 4,
+		.plaintext = "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78",
+		.psize = 40,
+		.digest = "\xb4\x97\xcc\xd4",
+	},
+	{
+		.key = "\x2e\x80\x04\x59",
+		.ksize = 4,
+		.plaintext = "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0",
+		.psize = 40,
+		.digest = "\x67\x9b\xfa\x79",
+	},
+	{
+		.key = "\xa6\xcc\x19\x85",
+		.ksize = 4,
+		.plaintext = "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8",
+		.psize = 40,
+		.digest = "\x24\xb5\x16\xef",
+	},
+	{
+		.key = "\x41\xfc\xfe\x2d",
+		.ksize = 4,
+		.plaintext = "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 40,
+		.digest = "\x15\x94\x80\x39",
+	},
+	{
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext = "\x01\x02\x03\x04\x05\x06\x07\x08"
+			     "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+			     "\x11\x12\x13\x14\x15\x16\x17\x18"
+			     "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20"
+			     "\x21\x22\x23\x24\x25\x26\x27\x28"
+			     "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"
+			     "\x31\x32\x33\x34\x35\x36\x37\x38"
+			     "\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40"
+			     "\x41\x42\x43\x44\x45\x46\x47\x48"
+			     "\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50"
+			     "\x51\x52\x53\x54\x55\x56\x57\x58"
+			     "\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60"
+			     "\x61\x62\x63\x64\x65\x66\x67\x68"
+			     "\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70"
+			     "\x71\x72\x73\x74\x75\x76\x77\x78"
+			     "\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80"
+			     "\x81\x82\x83\x84\x85\x86\x87\x88"
+			     "\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90"
+			     "\x91\x92\x93\x94\x95\x96\x97\x98"
+			     "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0"
+			     "\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8"
+			     "\xa9\xaa\xab\xac\xad\xae\xaf\xb0"
+			     "\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8"
+			     "\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0"
+			     "\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8"
+			     "\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0"
+			     "\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8"
+			     "\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0"
+			     "\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
+			     "\xe9\xea\xeb\xec\xed\xee\xef\xf0",
+		.psize = 240,
+		.digest = "\x6c\xc6\x56\xde",
+		.np = 2,
+		.tap = { 31, 209 }
+	}, {
+		.key = "\xff\xff\xff\xff",
+		.ksize = 4,
+		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
+				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
+				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
+				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
+				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
+				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
+				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
+				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
+				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
+				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
+				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
+				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
+				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
+				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
+				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
+				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
+				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
+				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
+				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
+				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
+				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
+				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
+				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
+				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
+				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
+				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
+				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
+				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
+				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
+				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
+				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
+				"\x47\xde\x75\x0c\x80\x17\xae\x22"
+				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
+				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
+				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
+				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
+				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
+				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
+				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
+				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
+				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
+				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
+				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
+				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
+				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
+				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
+				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
+				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
+				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
+				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
+				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
+				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
+				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
+				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
+				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
+				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
+				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
+				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
+				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
+				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
+				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
+				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
+				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
+				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
+				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
+				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
+				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
+				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
+				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
+				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
+				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
+				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
+				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
+				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
+				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
+				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
+				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
+				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
+				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
+				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
+				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
+				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
+				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
+				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
+				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
+				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
+				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
+				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
+				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
+				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
+				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
+				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
+				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
+				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
+				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
+				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
+				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
+				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
+				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
+				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
+				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
+				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
+				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
+				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
+				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
+				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
+				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
+				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
+				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
+				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
+				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
+				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
+				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
+				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
+				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
+				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
+				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
+				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
+				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
+				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
+				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
+				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
+				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
+				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
+				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
+				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
+				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
+				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
+				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
+				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
+				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
+				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
+				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
+				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
+				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
+				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
+				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
+				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
+				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
+				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
+				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
+				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
+				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
+				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
+				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
+				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
+				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
+				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
+				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
+				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
+				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
+				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
+				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
+				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
+				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
+				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
+				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
+				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
+				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
+				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
+				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
+				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
+				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
+				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
+				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
+				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
+				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
+				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
+				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
+				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
+				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
+				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
+				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
+				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
+				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
+				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
+				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
+				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
+				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
+				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
+				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
+				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
+				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
+				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
+				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
+				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
+				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
+				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
+				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
+				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
+				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
+				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
+				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
+				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
+				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
+				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
+				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
+				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
+				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
+				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
+				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
+				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
+				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
+				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
+				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
+				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
+				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
+				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
+				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
+				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
+				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
+				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
+				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
+				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
+				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
+				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
+				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
+				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
+				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
+				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
+				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
+				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
+				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
+				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
+				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
+				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
+				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
+				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
+				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
+				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
+				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
+				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
+				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
+				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
+				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
+				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
+				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
+				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
+				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
+				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
+				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
+				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
+				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
+				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
+				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
+				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
+				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
+				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
+				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
+				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
+				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
+				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
+				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
+				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
+				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
+				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
+		.psize = 2048,
+		.digest = "\xfb\x3a\x7a\xda",
+	}
+};
+
+/*
  * CRC32C test vectors
  */
 #define CRC32C_TEST_VECTORS 15
diff --git a/crypto/zlib.c b/crypto/zlib.c
index 0eefa9d..d51a30a 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -78,7 +78,7 @@ static void zlib_exit(struct crypto_tfm *tfm)
 }
 
 
-static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
+static int zlib_compress_setup(struct crypto_pcomp *tfm, const void *params,
 			       unsigned int len)
 {
 	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
@@ -209,7 +209,7 @@ static int zlib_compress_final(struct crypto_pcomp *tfm,
 }
 
 
-static int zlib_decompress_setup(struct crypto_pcomp *tfm, void *params,
+static int zlib_decompress_setup(struct crypto_pcomp *tfm, const void *params,
 				 unsigned int len)
 {
 	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 9cd6968..159d070 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -660,7 +660,7 @@ retry:
 		r->entropy_total = 0;
 		if (r == &nonblocking_pool) {
 			prandom_reseed_late();
-			wake_up_interruptible(&urandom_init_wait);
+			wake_up_all(&urandom_init_wait);
 			pr_notice("random: %s pool is initialized\n", r->name);
 		}
 	}
@@ -1245,6 +1245,18 @@ void get_random_bytes(void *buf, int nbytes)
 EXPORT_SYMBOL(get_random_bytes);
 
 /*
+ * Equivalent function to get_random_bytes with the difference that this
+ * function blocks the request until the nonblocking_pool is initialized.
+ */
+void get_blocking_random_bytes(void *buf, int nbytes)
+{
+	if (unlikely(nonblocking_pool.initialized == 0))
+		wait_event(urandom_init_wait, nonblocking_pool.initialized);
+	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
+}
+EXPORT_SYMBOL(get_blocking_random_bytes);
+
+/*
  * This function will use the architecture-specific hardware random
  * number generator if it is available.  The arch-specific hw RNG will
  * almost certainly be faster than what we can do in software, but it
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 033c0c8..6b0579f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS
 	  To compile this driver as a module, choose M here: the module
 	  will be called talitos.
 
+config CRYPTO_DEV_TALITOS1
+	bool "SEC1 (SEC 1.0 and SEC Lite 1.2)"
+	depends on CRYPTO_DEV_TALITOS
+	depends on PPC_8xx || PPC_82xx
+	default y
+	help
+	  Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0
+	  found on MPC82xx or the Freescale Security Engine (SEC Lite)
+	  version 1.2 found on MPC8xx
+
+config CRYPTO_DEV_TALITOS2
+	bool "SEC2+ (SEC version 2.0 or upper)"
+	depends on CRYPTO_DEV_TALITOS
+	default y if !PPC_8xx
+	help
+	  Say 'Y' here to use the Freescale Security Engine (SEC)
+	  version 2 and following as found on MPC83xx, MPC85xx, etc ...
+
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
@@ -312,11 +330,13 @@ config CRYPTO_DEV_S5P
 	  algorithms execution.
 
 config CRYPTO_DEV_NX
-	bool "Support for IBM Power7+ in-Nest cryptographic acceleration"
-	depends on PPC64 && IBMVIO && !CPU_LITTLE_ENDIAN
-	default n
+	bool "Support for IBM PowerPC Nest (NX) cryptographic acceleration"
+	depends on PPC64
 	help
-	  Support for Power7+ in-Nest cryptographic acceleration.
+	  This enables support for the NX hardware cryptographic accelerator
+	  coprocessor that is in IBM PowerPC P7+ or later processors.  This
+	  does not actually enable any drivers, it only allows you to select
+	  which acceleration type (encryption and/or compression) to enable.
 
 if CRYPTO_DEV_NX
 	source "drivers/crypto/nx/Kconfig"
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 29071a1..3d850ab 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -258,7 +258,7 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx,
 
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -383,7 +383,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
@@ -449,7 +449,7 @@ static int aead_null_set_sh_desc(struct crypto_aead *aead)
 
 static int aead_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct crypto_tfm *ctfm = crypto_aead_tfm(aead);
 	const char *alg_name = crypto_tfm_alg_name(ctfm);
@@ -510,7 +510,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
 
 	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
+	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ);
@@ -518,7 +518,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	/* read assoc before reading payload */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
 			     KEY_VLF);
-	aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off);
+	aead_append_ld_iv(desc, ivsize, ctx1_iv_off);
 
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
@@ -577,7 +577,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ);
@@ -586,7 +586,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
 			     KEY_VLF);
 
-	aead_append_ld_iv(desc, tfm->ivsize, ctx1_iv_off);
+	aead_append_ld_iv(desc, ivsize, ctx1_iv_off);
 
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
@@ -645,20 +645,20 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	/* Generate IV */
 	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
 		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
 	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
 	append_move(desc, MOVE_WAITCOMP |
 		    MOVE_SRC_INFIFO | MOVE_DEST_CLASS1CTX |
 		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
 
 	/* Copy IV to class 1 context */
 	append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_OUTFIFO |
 		    (ctx1_iv_off << MOVE_OFFSET_SHIFT) |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 
 	/* Return to encryption */
 	append_operation(desc, ctx->class2_alg_type |
@@ -676,10 +676,10 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 	/* Copy iv from outfifo to class 2 fifo */
 	moveiv = NFIFOENTRY_STYPE_OFIFO | NFIFOENTRY_DEST_CLASS2 |
-		 NFIFOENTRY_DTYPE_MSG | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		 NFIFOENTRY_DTYPE_MSG | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, moveiv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
-	append_load_imm_u32(desc, tfm->ivsize, LDST_CLASS_2_CCB |
+	append_load_imm_u32(desc, ivsize, LDST_CLASS_2_CCB |
 			    LDST_SRCDST_WORD_DATASZ_REG | LDST_IMM);
 
 	/* Load Counter into CONTEXT1 reg */
@@ -698,7 +698,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	append_math_add(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
 	/* Not need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize,
+	append_seq_fifo_load(desc, ivsize,
 			     FIFOLD_CLASS_SKIP);
 
 	/* Will read cryptlen */
@@ -738,7 +738,7 @@ static int aead_setauthsize(struct crypto_aead *authenc,
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -781,7 +781,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
 
 	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
+	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG1, REG2, REG3, CAAM_CMD_SZ);
@@ -791,7 +791,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	zero_payload_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
 					    JUMP_COND_MATH_Z);
 	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
 
 	/* if assoclen is ZERO, skip reading the assoc data */
@@ -824,7 +824,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 	zero_assoc_jump_cmd2 = append_jump(desc, JUMP_TEST_ALL |
 					   JUMP_COND_MATH_Z);
 	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
 
 	/* read assoc data */
@@ -836,7 +836,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 
 	/* read IV - is the only input data */
 	set_jump_tgt_here(desc, zero_assoc_jump_cmd2);
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 |
 			     FIFOLD_TYPE_LAST1);
 
@@ -888,14 +888,14 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - icvsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 	append_math_sub(desc, REG1, REG3, REG2, CAAM_CMD_SZ);
 
 	/* read IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
 
 	/* jump to zero-payload command if cryptlen is zero */
@@ -968,7 +968,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -1012,7 +1012,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* assoclen + cryptlen = seqinlen - ivsize */
-	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize);
+	append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ);
@@ -1021,7 +1021,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen),
 				4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV);
 	/* Read AES-GCM-ESP IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
 
 	/* Read assoc data */
@@ -1085,7 +1085,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 
 	/* assoclen + cryptlen = seqinlen - ivsize - icvsize */
 	append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM,
-				ctx->authsize + tfm->ivsize);
+				ctx->authsize + ivsize);
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
@@ -1098,7 +1098,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	append_fifo_load_as_imm(desc, (void *)(ctx->key + ctx->enckeylen),
 				4, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_IV);
 	/* Read AES-GCM-ESP IV */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_CLASS1 |
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_CLASS1 |
 			     FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1);
 
 	/* Read assoc data */
@@ -1161,17 +1161,17 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	/* Generate IV */
 	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
 		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
 	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
 	move_cmd = append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_DESCBUF |
-			       (tfm->ivsize << MOVE_LEN_SHIFT));
+			       (ivsize << MOVE_LEN_SHIFT));
 	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
 
 	/* Copy generated IV to OFIFO */
 	write_iv_cmd = append_move(desc, MOVE_SRC_DESCBUF | MOVE_DEST_OUTFIFO |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
+				   (ivsize << MOVE_LEN_SHIFT));
 
 	/* Class 1 operation */
 	append_operation(desc, ctx->class1_alg_type |
@@ -1199,7 +1199,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 	/* End of blank commands */
 
 	/* No need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
@@ -1249,7 +1249,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc,
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
-	struct aead_tfm *tfm = &aead->base.crt_aead;
+	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool keys_fit_inline = false;
@@ -1291,7 +1291,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Load AES-GMAC ESP IV into Math1 register */
 	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 |
-		   LDST_CLASS_DECO | tfm->ivsize);
+		   LDST_CLASS_DECO | ivsize);
 
 	/* Wait the DMA transaction to finish */
 	append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM |
@@ -1299,11 +1299,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Overwrite blank immediate AES-GMAC ESP IV data */
 	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
+				   (ivsize << MOVE_LEN_SHIFT));
 
 	/* Overwrite blank immediate AAD data */
 	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
+				    (ivsize << MOVE_LEN_SHIFT));
 
 	/* cryptlen = seqoutlen - authsize */
 	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
@@ -1313,7 +1313,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Read Salt and AES-GMAC ESP IV */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
+		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize));
 	/* Append Salt */
 	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
 	set_move_tgt_here(desc, write_iv_cmd);
@@ -1344,7 +1344,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Authenticate AES-GMAC ESP IV  */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
+		   FIFOLD_TYPE_AAD | ivsize);
 	set_move_tgt_here(desc, write_aad_cmd);
 	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
 	append_cmd(desc, 0x00000000);
@@ -1407,7 +1407,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Load AES-GMAC ESP IV into Math1 register */
 	append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_WORD_DECO_MATH1 |
-		   LDST_CLASS_DECO | tfm->ivsize);
+		   LDST_CLASS_DECO | ivsize);
 
 	/* Wait the DMA transaction to finish */
 	append_jump(desc, JUMP_TEST_ALL | JUMP_COND_CALM |
@@ -1418,11 +1418,11 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Overwrite blank immediate AES-GMAC ESP IV data */
 	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
+				   (ivsize << MOVE_LEN_SHIFT));
 
 	/* Overwrite blank immediate AAD data */
 	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
+				    (ivsize << MOVE_LEN_SHIFT));
 
 	/* assoclen = (assoclen + cryptlen) - cryptlen */
 	append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ);
@@ -1440,7 +1440,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Read Salt and AES-GMAC ESP IV */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
+		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize));
 	/* Append Salt */
 	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
 	set_move_tgt_here(desc, write_iv_cmd);
@@ -1461,7 +1461,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Authenticate AES-GMAC ESP IV  */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
+		   FIFOLD_TYPE_AAD | ivsize);
 	set_move_tgt_here(desc, write_aad_cmd);
 	/* Blank commands. Will be overwritten by AES-GMAC ESP IV. */
 	append_cmd(desc, 0x00000000);
@@ -1527,26 +1527,26 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	/* Generate IV */
 	geniv = NFIFOENTRY_STYPE_PAD | NFIFOENTRY_DEST_DECO |
 		NFIFOENTRY_DTYPE_MSG | NFIFOENTRY_LC1 |
-		NFIFOENTRY_PTYPE_RND | (tfm->ivsize << NFIFOENTRY_DLEN_SHIFT);
+		NFIFOENTRY_PTYPE_RND | (ivsize << NFIFOENTRY_DLEN_SHIFT);
 	append_load_imm_u32(desc, geniv, LDST_CLASS_IND_CCB |
 			    LDST_SRCDST_WORD_INFO_FIFO | LDST_IMM);
 	append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO);
 	/* Move generated IV to Math1 register */
 	append_move(desc, MOVE_SRC_INFIFO | MOVE_DEST_MATH1 |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 	append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO);
 
 	/* Overwrite blank immediate AES-GMAC IV data */
 	write_iv_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				   (tfm->ivsize << MOVE_LEN_SHIFT));
+				   (ivsize << MOVE_LEN_SHIFT));
 
 	/* Overwrite blank immediate AAD data */
 	write_aad_cmd = append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_DESCBUF |
-				    (tfm->ivsize << MOVE_LEN_SHIFT));
+				    (ivsize << MOVE_LEN_SHIFT));
 
 	/* Copy generated IV to OFIFO */
 	append_move(desc, MOVE_SRC_MATH1 | MOVE_DEST_OUTFIFO |
-		    (tfm->ivsize << MOVE_LEN_SHIFT));
+		    (ivsize << MOVE_LEN_SHIFT));
 
 	/* Class 1 operation */
 	append_operation(desc, ctx->class1_alg_type |
@@ -1573,7 +1573,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Read Salt and AES-GMAC generated IV */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + tfm->ivsize));
+		   FIFOLD_TYPE_IV | FIFOLD_TYPE_FLUSH1 | (4 + ivsize));
 	/* Append Salt */
 	append_data(desc, (void *)(ctx->key + ctx->enckeylen), 4);
 	set_move_tgt_here(desc, write_iv_cmd);
@@ -1583,7 +1583,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 	/* End of blank commands */
 
 	/* No need to reload iv */
-	append_seq_fifo_load(desc, tfm->ivsize, FIFOLD_CLASS_SKIP);
+	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
 
 	/* Read assoc data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
@@ -1594,7 +1594,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 
 	/* Authenticate AES-GMAC IV  */
 	append_cmd(desc, CMD_FIFO_LOAD | FIFOLD_CLASS_CLASS1 | IMMEDIATE |
-		   FIFOLD_TYPE_AAD | tfm->ivsize);
+		   FIFOLD_TYPE_AAD | ivsize);
 	set_move_tgt_here(desc, write_aad_cmd);
 	/* Blank commands. Will be overwritten by AES-GMAC IV. */
 	append_cmd(desc, 0x00000000);
@@ -3379,11 +3379,7 @@ struct caam_alg_template {
 	u32 type;
 	union {
 		struct ablkcipher_alg ablkcipher;
-		struct aead_alg aead;
-		struct blkcipher_alg blkcipher;
-		struct cipher_alg cipher;
-		struct compress_alg compress;
-		struct rng_alg rng;
+		struct old_aead_alg aead;
 	} template_u;
 	u32 class1_alg_type;
 	u32 class2_alg_type;
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index acd7743..f57f395 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -32,7 +32,7 @@
 #include <crypto/des.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/skcipher.h>
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 71f2e3c..542453c 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -53,7 +53,6 @@ struct ccp_dm_workarea {
 struct ccp_sg_workarea {
 	struct scatterlist *sg;
 	unsigned int nents;
-	unsigned int length;
 
 	struct scatterlist *dma_sg;
 	struct device *dma_dev;
@@ -497,7 +496,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
 		return 0;
 
 	wa->nents = sg_nents(sg);
-	wa->length = sg->length;
 	wa->bytes_left = len;
 	wa->sg_used = 0;
 
diff --git a/drivers/crypto/ccp/ccp-platform.c b/drivers/crypto/ccp/ccp-platform.c
index b1c20b2..c0aa5c5 100644
--- a/drivers/crypto/ccp/ccp-platform.c
+++ b/drivers/crypto/ccp/ccp-platform.c
@@ -174,8 +174,6 @@ static int ccp_platform_probe(struct platform_device *pdev)
 	}
 	ccp->io_regs = ccp->io_map;
 
-	if (!dev->dma_mask)
-		dev->dma_mask = &dev->coherent_dma_mask;
 	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
 	if (ret) {
 		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 48f4535..7ba495f 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -25,7 +25,7 @@
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/algapi.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/scatterwalk.h>
 
@@ -575,7 +575,8 @@ static int init_tfm_ablk(struct crypto_tfm *tfm)
 
 static int init_tfm_aead(struct crypto_tfm *tfm)
 {
-	tfm->crt_aead.reqsize = sizeof(struct aead_ctx);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct aead_ctx));
 	return init_tfm(tfm);
 }
 
@@ -1096,7 +1097,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
 	u32 *flags = &tfm->base.crt_flags;
-	unsigned digest_len = crypto_aead_alg(tfm)->maxauthsize;
+	unsigned digest_len = crypto_aead_maxauthsize(tfm);
 	int ret;
 
 	if (!ctx->enckey_len && !ctx->authkey_len)
@@ -1138,7 +1139,7 @@ out:
 
 static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 {
-	int max = crypto_aead_alg(tfm)->maxauthsize >> 2;
+	int max = crypto_aead_maxauthsize(tfm) >> 2;
 
 	if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3))
 		return -EINVAL;
diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c
index f91f15d..eb645c2 100644
--- a/drivers/crypto/mv_cesa.c
+++ b/drivers/crypto/mv_cesa.c
@@ -595,7 +595,7 @@ static int queue_manag(void *data)
 	cpg->eng_st = ENGINE_IDLE;
 	do {
 		struct crypto_async_request *async_req = NULL;
-		struct crypto_async_request *backlog;
+		struct crypto_async_request *backlog = NULL;
 
 		__set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1041,23 +1041,23 @@ static int mv_probe(struct platform_device *pdev)
 
 	spin_lock_init(&cp->lock);
 	crypto_init_queue(&cp->queue, 50);
-	cp->reg = ioremap(res->start, resource_size(res));
-	if (!cp->reg) {
-		ret = -ENOMEM;
+	cp->reg = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cp->reg)) {
+		ret = PTR_ERR(cp->reg);
 		goto err;
 	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sram");
 	if (!res) {
 		ret = -ENXIO;
-		goto err_unmap_reg;
+		goto err;
 	}
 	cp->sram_size = resource_size(res);
 	cp->max_req_size = cp->sram_size - SRAM_CFG_SPACE;
 	cp->sram = ioremap(res->start, cp->sram_size);
 	if (!cp->sram) {
 		ret = -ENOMEM;
-		goto err_unmap_reg;
+		goto err;
 	}
 
 	if (pdev->dev.of_node)
@@ -1136,8 +1136,6 @@ err_thread:
 	kthread_stop(cp->queue_th);
 err_unmap_sram:
 	iounmap(cp->sram);
-err_unmap_reg:
-	iounmap(cp->reg);
 err:
 	kfree(cp);
 	cpg = NULL;
@@ -1158,7 +1156,6 @@ static int mv_remove(struct platform_device *pdev)
 	free_irq(cp->irq, cp);
 	memset(cp->sram, 0, cp->sram_size);
 	iounmap(cp->sram);
-	iounmap(cp->reg);
 
 	if (!IS_ERR(cp->clk)) {
 		clk_disable_unprepare(cp->clk);
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index afd136b..722392f 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1281,10 +1281,10 @@ static const char md5_zero[MD5_DIGEST_SIZE] = {
 	0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e,
 };
 static const u32 md5_init[MD5_HASH_WORDS] = {
-	cpu_to_le32(0x67452301),
-	cpu_to_le32(0xefcdab89),
-	cpu_to_le32(0x98badcfe),
-	cpu_to_le32(0x10325476),
+	cpu_to_le32(MD5_H0),
+	cpu_to_le32(MD5_H1),
+	cpu_to_le32(MD5_H2),
+	cpu_to_le32(MD5_H3),
 };
 static const char sha1_zero[SHA1_DIGEST_SIZE] = {
 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32,
diff --git a/drivers/crypto/nx/Kconfig b/drivers/crypto/nx/Kconfig
index f826166..3e621ad 100644
--- a/drivers/crypto/nx/Kconfig
+++ b/drivers/crypto/nx/Kconfig
@@ -1,7 +1,9 @@
+
 config CRYPTO_DEV_NX_ENCRYPT
-	tristate "Encryption acceleration support"
-	depends on PPC64 && IBMVIO
+	tristate "Encryption acceleration support on pSeries platform"
+	depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
 	default y
+	select CRYPTO_ALGAPI
 	select CRYPTO_AES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
@@ -12,15 +14,50 @@ config CRYPTO_DEV_NX_ENCRYPT
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
 	help
-	  Support for Power7+ in-Nest encryption acceleration. This
-	  module supports acceleration for AES and SHA2 algorithms. If you
-	  choose 'M' here, this module will be called nx_crypto.
+	  Support for PowerPC Nest (NX) encryption acceleration. This
+	  module supports acceleration for AES and SHA2 algorithms on
+	  the pSeries platform.  If you choose 'M' here, this module
+	  will be called nx_crypto.
 
 config CRYPTO_DEV_NX_COMPRESS
 	tristate "Compression acceleration support"
-	depends on PPC64 && IBMVIO
 	default y
 	help
-	  Support for Power7+ in-Nest compression acceleration. This
-	  module supports acceleration for AES and SHA2 algorithms. If you
-	  choose 'M' here, this module will be called nx_compress.
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  One of the platform drivers must be selected also.
+	  If you choose 'M' here, this module will be called nx_compress.
+
+if CRYPTO_DEV_NX_COMPRESS
+
+config CRYPTO_DEV_NX_COMPRESS_PSERIES
+	tristate "Compression acceleration support on pSeries platform"
+	depends on PPC_PSERIES && IBMVIO && !CPU_LITTLE_ENDIAN
+	default y
+	help
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  This supports NX hardware on the pSeries platform.
+	  If you choose 'M' here, this module will be called nx_compress_pseries.
+
+config CRYPTO_DEV_NX_COMPRESS_POWERNV
+	tristate "Compression acceleration support on PowerNV platform"
+	depends on PPC_POWERNV
+	default y
+	help
+	  Support for PowerPC Nest (NX) compression acceleration. This
+	  module supports acceleration for compressing memory with the 842
+	  algorithm.  This supports NX hardware on the PowerNV platform.
+	  If you choose 'M' here, this module will be called nx_compress_powernv.
+
+config CRYPTO_DEV_NX_COMPRESS_CRYPTO
+	tristate "Compression acceleration cryptographic interface"
+	select CRYPTO_ALGAPI
+	select 842_DECOMPRESS
+	default y
+	help
+	  Support for PowerPC Nest (NX) accelerators using the cryptographic
+	  API.  If you choose 'M' here, this module will be called
+	  nx_compress_crypto.
+
+endif
diff --git a/drivers/crypto/nx/Makefile b/drivers/crypto/nx/Makefile
index bb770ea..868b5e6 100644
--- a/drivers/crypto/nx/Makefile
+++ b/drivers/crypto/nx/Makefile
@@ -11,4 +11,10 @@ nx-crypto-objs := nx.o \
 		  nx-sha512.o
 
 obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o
 nx-compress-objs := nx-842.o
+nx-compress-pseries-objs := nx-842-pseries.o
+nx-compress-powernv-objs := nx-842-powernv.o
+nx-compress-crypto-objs := nx-842-crypto.o
diff --git a/drivers/crypto/nx/nx-842-crypto.c b/drivers/crypto/nx/nx-842-crypto.c
new file mode 100644
index 0000000..2ffa103
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-crypto.c
@@ -0,0 +1,579 @@
+/*
+ * Cryptographic API for the NX-842 hardware compression.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) IBM Corporation, 2011-2015
+ *
+ * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
+ *
+ * Rewrite: Dan Streetman <ddstreet@ieee.org>
+ *
+ * This is an interface to the NX-842 compression hardware in PowerPC
+ * processors.  Most of the complexity of this drvier is due to the fact that
+ * the NX-842 compression hardware requires the input and output data buffers
+ * to be specifically aligned, to be a specific multiple in length, and within
+ * specific minimum and maximum lengths.  Those restrictions, provided by the
+ * nx-842 driver via nx842_constraints, mean this driver must use bounce
+ * buffers and headers to correct misaligned in or out buffers, and to split
+ * input buffers that are too large.
+ *
+ * This driver will fall back to software decompression if the hardware
+ * decompression fails, so this driver's decompression should never fail as
+ * long as the provided compressed buffer is valid.  Any compressed buffer
+ * created by this driver will have a header (except ones where the input
+ * perfectly matches the constraints); so users of this driver cannot simply
+ * pass a compressed buffer created by this driver over to the 842 software
+ * decompression library.  Instead, users must use this driver to decompress;
+ * if the hardware fails or is unavailable, the compressed buffer will be
+ * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
+ * software decompression library.
+ *
+ * This does not fall back to software compression, however, since the caller
+ * of this function is specifically requesting hardware compression; if the
+ * hardware compression fails, the caller can fall back to software
+ * compression, and the raw 842 compressed buffer that the software compressor
+ * creates can be passed to this driver for hardware decompression; any
+ * buffer without our specific header magic is assumed to be a raw 842 buffer
+ * and passed directly to the hardware.  Note that the software compression
+ * library will produce a compressed buffer that is incompatible with the
+ * hardware decompressor if the original input buffer length is not a multiple
+ * of 8; if such a compressed buffer is passed to this driver for
+ * decompression, the hardware will reject it and this driver will then pass
+ * it over to the software library for decompression.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/nx842.h>
+#include <linux/sw842.h>
+#include <linux/ratelimit.h>
+
+/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
+ * template (see lib/842/842.h), so this magic number will never appear at
+ * the start of a raw 842 compressed buffer.  That is important, as any buffer
+ * passed to us without this magic is assumed to be a raw 842 compressed
+ * buffer, and passed directly to the hardware to decompress.
+ */
+#define NX842_CRYPTO_MAGIC	(0xf842)
+#define NX842_CRYPTO_GROUP_MAX	(0x20)
+#define NX842_CRYPTO_HEADER_SIZE(g)				\
+	(sizeof(struct nx842_crypto_header) +			\
+	 sizeof(struct nx842_crypto_header_group) * (g))
+#define NX842_CRYPTO_HEADER_MAX_SIZE				\
+	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
+
+/* bounce buffer size */
+#define BOUNCE_BUFFER_ORDER	(2)
+#define BOUNCE_BUFFER_SIZE					\
+	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
+
+/* try longer on comp because we can fallback to sw decomp if hw is busy */
+#define COMP_BUSY_TIMEOUT	(250) /* ms */
+#define DECOMP_BUSY_TIMEOUT	(50) /* ms */
+
+struct nx842_crypto_header_group {
+	__be16 padding;			/* unused bytes at start of group */
+	__be32 compressed_length;	/* compressed bytes in group */
+	__be32 uncompressed_length;	/* bytes after decompression */
+} __packed;
+
+struct nx842_crypto_header {
+	__be16 magic;		/* NX842_CRYPTO_MAGIC */
+	__be16 ignore;		/* decompressed end bytes to ignore */
+	u8 groups;		/* total groups in this header */
+	struct nx842_crypto_header_group group[];
+} __packed;
+
+struct nx842_crypto_param {
+	u8 *in;
+	unsigned int iremain;
+	u8 *out;
+	unsigned int oremain;
+	unsigned int ototal;
+};
+
+static int update_param(struct nx842_crypto_param *p,
+			unsigned int slen, unsigned int dlen)
+{
+	if (p->iremain < slen)
+		return -EOVERFLOW;
+	if (p->oremain < dlen)
+		return -ENOSPC;
+
+	p->in += slen;
+	p->iremain -= slen;
+	p->out += dlen;
+	p->oremain -= dlen;
+	p->ototal += dlen;
+
+	return 0;
+}
+
+struct nx842_crypto_ctx {
+	u8 *wmem;
+	u8 *sbounce, *dbounce;
+
+	struct nx842_crypto_header header;
+	struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
+};
+
+static int nx842_crypto_init(struct crypto_tfm *tfm)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->wmem = kmalloc(NX842_MEM_COMPRESS, GFP_KERNEL);
+	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
+		kfree(ctx->wmem);
+		free_page((unsigned long)ctx->sbounce);
+		free_page((unsigned long)ctx->dbounce);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void nx842_crypto_exit(struct crypto_tfm *tfm)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	kfree(ctx->wmem);
+	free_page((unsigned long)ctx->sbounce);
+	free_page((unsigned long)ctx->dbounce);
+}
+
+static int read_constraints(struct nx842_constraints *c)
+{
+	int ret;
+
+	ret = nx842_constraints(c);
+	if (ret) {
+		pr_err_ratelimited("could not get nx842 constraints : %d\n",
+				   ret);
+		return ret;
+	}
+
+	/* limit maximum, to always have enough bounce buffer to decompress */
+	if (c->maximum > BOUNCE_BUFFER_SIZE) {
+		c->maximum = BOUNCE_BUFFER_SIZE;
+		pr_info_once("limiting nx842 maximum to %x\n", c->maximum);
+	}
+
+	return 0;
+}
+
+static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
+{
+	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
+
+	/* compress should have added space for header */
+	if (s > be16_to_cpu(hdr->group[0].padding)) {
+		pr_err("Internal error: no space for header\n");
+		return -EINVAL;
+	}
+
+	memcpy(buf, hdr, s);
+
+	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
+
+	return 0;
+}
+
+static int compress(struct nx842_crypto_ctx *ctx,
+		    struct nx842_crypto_param *p,
+		    struct nx842_crypto_header_group *g,
+		    struct nx842_constraints *c,
+		    u16 *ignore,
+		    unsigned int hdrsize)
+{
+	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
+	unsigned int adj_slen = slen;
+	u8 *src = p->in, *dst = p->out;
+	int ret, dskip = 0;
+	ktime_t timeout;
+
+	if (p->iremain == 0)
+		return -EOVERFLOW;
+
+	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
+		return -ENOSPC;
+
+	if (slen % c->multiple)
+		adj_slen = round_up(slen, c->multiple);
+	if (slen < c->minimum)
+		adj_slen = c->minimum;
+	if (slen > c->maximum)
+		adj_slen = slen = c->maximum;
+	if (adj_slen > slen || (u64)src % c->alignment) {
+		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
+		slen = min(slen, BOUNCE_BUFFER_SIZE);
+		if (adj_slen > slen)
+			memset(ctx->sbounce + slen, 0, adj_slen - slen);
+		memcpy(ctx->sbounce, src, slen);
+		src = ctx->sbounce;
+		slen = adj_slen;
+		pr_debug("using comp sbounce buffer, len %x\n", slen);
+	}
+
+	dst += hdrsize;
+	dlen -= hdrsize;
+
+	if ((u64)dst % c->alignment) {
+		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
+		dst += dskip;
+		dlen -= dskip;
+	}
+	if (dlen % c->multiple)
+		dlen = round_down(dlen, c->multiple);
+	if (dlen < c->minimum) {
+nospc:
+		dst = ctx->dbounce;
+		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
+		dlen = round_down(dlen, c->multiple);
+		dskip = 0;
+		pr_debug("using comp dbounce buffer, len %x\n", dlen);
+	}
+	if (dlen > c->maximum)
+		dlen = c->maximum;
+
+	tmplen = dlen;
+	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
+	do {
+		dlen = tmplen; /* reset dlen, if we're retrying */
+		ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem);
+		/* possibly we should reduce the slen here, instead of
+		 * retrying with the dbounce buffer?
+		 */
+		if (ret == -ENOSPC && dst != ctx->dbounce)
+			goto nospc;
+	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+	if (ret)
+		return ret;
+
+	dskip += hdrsize;
+
+	if (dst == ctx->dbounce)
+		memcpy(p->out + dskip, dst, dlen);
+
+	g->padding = cpu_to_be16(dskip);
+	g->compressed_length = cpu_to_be32(dlen);
+	g->uncompressed_length = cpu_to_be32(slen);
+
+	if (p->iremain < slen) {
+		*ignore = slen - p->iremain;
+		slen = p->iremain;
+	}
+
+	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
+		 slen, *ignore, dlen, dskip);
+
+	return update_param(p, slen, dskip + dlen);
+}
+
+static int nx842_crypto_compress(struct crypto_tfm *tfm,
+				 const u8 *src, unsigned int slen,
+				 u8 *dst, unsigned int *dlen)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr = &ctx->header;
+	struct nx842_crypto_param p;
+	struct nx842_constraints c;
+	unsigned int groups, hdrsize, h;
+	int ret, n;
+	bool add_header;
+	u16 ignore = 0;
+
+	p.in = (u8 *)src;
+	p.iremain = slen;
+	p.out = dst;
+	p.oremain = *dlen;
+	p.ototal = 0;
+
+	*dlen = 0;
+
+	ret = read_constraints(&c);
+	if (ret)
+		return ret;
+
+	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
+		       DIV_ROUND_UP(p.iremain, c.maximum));
+	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
+
+	/* skip adding header if the buffers meet all constraints */
+	add_header = (p.iremain % c.multiple	||
+		      p.iremain < c.minimum	||
+		      p.iremain > c.maximum	||
+		      (u64)p.in % c.alignment	||
+		      p.oremain % c.multiple	||
+		      p.oremain < c.minimum	||
+		      p.oremain > c.maximum	||
+		      (u64)p.out % c.alignment);
+
+	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
+	hdr->groups = 0;
+	hdr->ignore = 0;
+
+	while (p.iremain > 0) {
+		n = hdr->groups++;
+		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
+			return -ENOSPC;
+
+		/* header goes before first group */
+		h = !n && add_header ? hdrsize : 0;
+
+		if (ignore)
+			pr_warn("interal error, ignore is set %x\n", ignore);
+
+		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
+		if (ret)
+			return ret;
+	}
+
+	if (!add_header && hdr->groups > 1) {
+		pr_err("Internal error: No header but multiple groups\n");
+		return -EINVAL;
+	}
+
+	/* ignore indicates the input stream needed to be padded */
+	hdr->ignore = cpu_to_be16(ignore);
+	if (ignore)
+		pr_debug("marked %d bytes as ignore\n", ignore);
+
+	if (add_header)
+		ret = nx842_crypto_add_header(hdr, dst);
+	if (ret)
+		return ret;
+
+	*dlen = p.ototal;
+
+	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
+
+	return 0;
+}
+
+static int decompress(struct nx842_crypto_ctx *ctx,
+		      struct nx842_crypto_param *p,
+		      struct nx842_crypto_header_group *g,
+		      struct nx842_constraints *c,
+		      u16 ignore,
+		      bool usehw)
+{
+	unsigned int slen = be32_to_cpu(g->compressed_length);
+	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
+	unsigned int dlen = p->oremain, tmplen;
+	unsigned int adj_slen = slen;
+	u8 *src = p->in, *dst = p->out;
+	u16 padding = be16_to_cpu(g->padding);
+	int ret, spadding = 0, dpadding = 0;
+	ktime_t timeout;
+
+	if (!slen || !required_len)
+		return -EINVAL;
+
+	if (p->iremain <= 0 || padding + slen > p->iremain)
+		return -EOVERFLOW;
+
+	if (p->oremain <= 0 || required_len - ignore > p->oremain)
+		return -ENOSPC;
+
+	src += padding;
+
+	if (!usehw)
+		goto usesw;
+
+	if (slen % c->multiple)
+		adj_slen = round_up(slen, c->multiple);
+	if (slen < c->minimum)
+		adj_slen = c->minimum;
+	if (slen > c->maximum)
+		goto usesw;
+	if (slen < adj_slen || (u64)src % c->alignment) {
+		/* we can append padding bytes because the 842 format defines
+		 * an "end" template (see lib/842/842_decompress.c) and will
+		 * ignore any bytes following it.
+		 */
+		if (slen < adj_slen)
+			memset(ctx->sbounce + slen, 0, adj_slen - slen);
+		memcpy(ctx->sbounce, src, slen);
+		src = ctx->sbounce;
+		spadding = adj_slen - slen;
+		slen = adj_slen;
+		pr_debug("using decomp sbounce buffer, len %x\n", slen);
+	}
+
+	if (dlen % c->multiple)
+		dlen = round_down(dlen, c->multiple);
+	if (dlen < required_len || (u64)dst % c->alignment) {
+		dst = ctx->dbounce;
+		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
+		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
+	}
+	if (dlen < c->minimum)
+		goto usesw;
+	if (dlen > c->maximum)
+		dlen = c->maximum;
+
+	tmplen = dlen;
+	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
+	do {
+		dlen = tmplen; /* reset dlen, if we're retrying */
+		ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem);
+	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+	if (ret) {
+usesw:
+		/* reset everything, sw doesn't have constraints */
+		src = p->in + padding;
+		slen = be32_to_cpu(g->compressed_length);
+		spadding = 0;
+		dst = p->out;
+		dlen = p->oremain;
+		dpadding = 0;
+		if (dlen < required_len) { /* have ignore bytes */
+			dst = ctx->dbounce;
+			dlen = BOUNCE_BUFFER_SIZE;
+		}
+		pr_info_ratelimited("using software 842 decompression\n");
+		ret = sw842_decompress(src, slen, dst, &dlen);
+	}
+	if (ret)
+		return ret;
+
+	slen -= spadding;
+
+	dlen -= ignore;
+	if (ignore)
+		pr_debug("ignoring last %x bytes\n", ignore);
+
+	if (dst == ctx->dbounce)
+		memcpy(p->out, dst, dlen);
+
+	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
+		 slen, padding, dlen, ignore);
+
+	return update_param(p, slen + padding, dlen);
+}
+
+static int nx842_crypto_decompress(struct crypto_tfm *tfm,
+				   const u8 *src, unsigned int slen,
+				   u8 *dst, unsigned int *dlen)
+{
+	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct nx842_crypto_header *hdr;
+	struct nx842_crypto_param p;
+	struct nx842_constraints c;
+	int n, ret, hdr_len;
+	u16 ignore = 0;
+	bool usehw = true;
+
+	p.in = (u8 *)src;
+	p.iremain = slen;
+	p.out = dst;
+	p.oremain = *dlen;
+	p.ototal = 0;
+
+	*dlen = 0;
+
+	if (read_constraints(&c))
+		usehw = false;
+
+	hdr = (struct nx842_crypto_header *)src;
+
+	/* If it doesn't start with our header magic number, assume it's a raw
+	 * 842 compressed buffer and pass it directly to the hardware driver
+	 */
+	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
+		struct nx842_crypto_header_group g = {
+			.padding =		0,
+			.compressed_length =	cpu_to_be32(p.iremain),
+			.uncompressed_length =	cpu_to_be32(p.oremain),
+		};
+
+		ret = decompress(ctx, &p, &g, &c, 0, usehw);
+		if (ret)
+			return ret;
+
+		*dlen = p.ototal;
+
+		return 0;
+	}
+
+	if (!hdr->groups) {
+		pr_err("header has no groups\n");
+		return -EINVAL;
+	}
+	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
+		pr_err("header has too many groups %x, max %x\n",
+		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
+		return -EINVAL;
+	}
+
+	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
+	if (hdr_len > slen)
+		return -EOVERFLOW;
+
+	memcpy(&ctx->header, src, hdr_len);
+	hdr = &ctx->header;
+
+	for (n = 0; n < hdr->groups; n++) {
+		/* ignore applies to last group */
+		if (n + 1 == hdr->groups)
+			ignore = be16_to_cpu(hdr->ignore);
+
+		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw);
+		if (ret)
+			return ret;
+	}
+
+	*dlen = p.ototal;
+
+	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
+
+	return 0;
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "842",
+	.cra_driver_name	= "842-nx",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct nx842_crypto_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= nx842_crypto_init,
+	.cra_exit		= nx842_crypto_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= nx842_crypto_compress,
+	.coa_decompress		= nx842_crypto_decompress } }
+};
+
+static int __init nx842_crypto_mod_init(void)
+{
+	return crypto_register_alg(&alg);
+}
+module_init(nx842_crypto_mod_init);
+
+static void __exit nx842_crypto_mod_exit(void)
+{
+	crypto_unregister_alg(&alg);
+}
+module_exit(nx842_crypto_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface");
+MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-nx");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
new file mode 100644
index 0000000..6a9fb8b
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -0,0 +1,625 @@
+/*
+ * Driver for IBM PowerNV 842 compression accelerator
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "nx-842.h"
+
+#include <linux/timer.h>
+
+#include <asm/prom.h>
+#include <asm/icswx.h>
+
+#define MODULE_NAME NX842_POWERNV_MODULE_NAME
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
+
+#define WORKMEM_ALIGN	(CRB_ALIGN)
+#define CSB_WAIT_MAX	(5000) /* ms */
+
+struct nx842_workmem {
+	/* Below fields must be properly aligned */
+	struct coprocessor_request_block crb; /* CRB_ALIGN align */
+	struct data_descriptor_entry ddl_in[DDL_LEN_MAX]; /* DDE_ALIGN align */
+	struct data_descriptor_entry ddl_out[DDL_LEN_MAX]; /* DDE_ALIGN align */
+	/* Above fields must be properly aligned */
+
+	ktime_t start;
+
+	char padding[WORKMEM_ALIGN]; /* unused, to allow alignment */
+} __packed __aligned(WORKMEM_ALIGN);
+
+struct nx842_coproc {
+	unsigned int chip_id;
+	unsigned int ct;
+	unsigned int ci;
+	struct list_head list;
+};
+
+/* no cpu hotplug on powernv, so this list never changes after init */
+static LIST_HEAD(nx842_coprocs);
+static unsigned int nx842_ct;
+
+/**
+ * setup_indirect_dde - Setup an indirect DDE
+ *
+ * The DDE is setup with the the DDE count, byte count, and address of
+ * first direct DDE in the list.
+ */
+static void setup_indirect_dde(struct data_descriptor_entry *dde,
+			       struct data_descriptor_entry *ddl,
+			       unsigned int dde_count, unsigned int byte_count)
+{
+	dde->flags = 0;
+	dde->count = dde_count;
+	dde->index = 0;
+	dde->length = cpu_to_be32(byte_count);
+	dde->address = cpu_to_be64(nx842_get_pa(ddl));
+}
+
+/**
+ * setup_direct_dde - Setup single DDE from buffer
+ *
+ * The DDE is setup with the buffer and length.  The buffer must be properly
+ * aligned.  The used length is returned.
+ * Returns:
+ *   N    Successfully set up DDE with N bytes
+ */
+static unsigned int setup_direct_dde(struct data_descriptor_entry *dde,
+				     unsigned long pa, unsigned int len)
+{
+	unsigned int l = min_t(unsigned int, len, LEN_ON_PAGE(pa));
+
+	dde->flags = 0;
+	dde->count = 0;
+	dde->index = 0;
+	dde->length = cpu_to_be32(l);
+	dde->address = cpu_to_be64(pa);
+
+	return l;
+}
+
+/**
+ * setup_ddl - Setup DDL from buffer
+ *
+ * Returns:
+ *   0		Successfully set up DDL
+ */
+static int setup_ddl(struct data_descriptor_entry *dde,
+		     struct data_descriptor_entry *ddl,
+		     unsigned char *buf, unsigned int len,
+		     bool in)
+{
+	unsigned long pa = nx842_get_pa(buf);
+	int i, ret, total_len = len;
+
+	if (!IS_ALIGNED(pa, DDE_BUFFER_ALIGN)) {
+		pr_debug("%s buffer pa 0x%lx not 0x%x-byte aligned\n",
+			 in ? "input" : "output", pa, DDE_BUFFER_ALIGN);
+		return -EINVAL;
+	}
+
+	/* only need to check last mult; since buffer must be
+	 * DDE_BUFFER_ALIGN aligned, and that is a multiple of
+	 * DDE_BUFFER_SIZE_MULT, and pre-last page DDE buffers
+	 * are guaranteed a multiple of DDE_BUFFER_SIZE_MULT.
+	 */
+	if (len % DDE_BUFFER_LAST_MULT) {
+		pr_debug("%s buffer len 0x%x not a multiple of 0x%x\n",
+			 in ? "input" : "output", len, DDE_BUFFER_LAST_MULT);
+		if (in)
+			return -EINVAL;
+		len = round_down(len, DDE_BUFFER_LAST_MULT);
+	}
+
+	/* use a single direct DDE */
+	if (len <= LEN_ON_PAGE(pa)) {
+		ret = setup_direct_dde(dde, pa, len);
+		WARN_ON(ret < len);
+		return 0;
+	}
+
+	/* use the DDL */
+	for (i = 0; i < DDL_LEN_MAX && len > 0; i++) {
+		ret = setup_direct_dde(&ddl[i], pa, len);
+		buf += ret;
+		len -= ret;
+		pa = nx842_get_pa(buf);
+	}
+
+	if (len > 0) {
+		pr_debug("0x%x total %s bytes 0x%x too many for DDL.\n",
+			 total_len, in ? "input" : "output", len);
+		if (in)
+			return -EMSGSIZE;
+		total_len -= len;
+	}
+	setup_indirect_dde(dde, ddl, i, total_len);
+
+	return 0;
+}
+
+#define CSB_ERR(csb, msg, ...)					\
+	pr_err("ERROR: " msg " : %02x %02x %02x %02x %08x\n",	\
+	       ##__VA_ARGS__, (csb)->flags,			\
+	       (csb)->cs, (csb)->cc, (csb)->ce,			\
+	       be32_to_cpu((csb)->count))
+
+#define CSB_ERR_ADDR(csb, msg, ...)				\
+	CSB_ERR(csb, msg " at %lx", ##__VA_ARGS__,		\
+		(unsigned long)be64_to_cpu((csb)->address))
+
+/**
+ * wait_for_csb
+ */
+static int wait_for_csb(struct nx842_workmem *wmem,
+			struct coprocessor_status_block *csb)
+{
+	ktime_t start = wmem->start, now = ktime_get();
+	ktime_t timeout = ktime_add_ms(start, CSB_WAIT_MAX);
+
+	while (!(ACCESS_ONCE(csb->flags) & CSB_V)) {
+		cpu_relax();
+		now = ktime_get();
+		if (ktime_after(now, timeout))
+			break;
+	}
+
+	/* hw has updated csb and output buffer */
+	barrier();
+
+	/* check CSB flags */
+	if (!(csb->flags & CSB_V)) {
+		CSB_ERR(csb, "CSB still not valid after %ld us, giving up",
+			(long)ktime_us_delta(now, start));
+		return -ETIMEDOUT;
+	}
+	if (csb->flags & CSB_F) {
+		CSB_ERR(csb, "Invalid CSB format");
+		return -EPROTO;
+	}
+	if (csb->flags & CSB_CH) {
+		CSB_ERR(csb, "Invalid CSB chaining state");
+		return -EPROTO;
+	}
+
+	/* verify CSB completion sequence is 0 */
+	if (csb->cs) {
+		CSB_ERR(csb, "Invalid CSB completion sequence");
+		return -EPROTO;
+	}
+
+	/* check CSB Completion Code */
+	switch (csb->cc) {
+	/* no error */
+	case CSB_CC_SUCCESS:
+		break;
+	case CSB_CC_TPBC_GT_SPBC:
+		/* not an error, but the compressed data is
+		 * larger than the uncompressed data :(
+		 */
+		break;
+
+	/* input data errors */
+	case CSB_CC_OPERAND_OVERLAP:
+		/* input and output buffers overlap */
+		CSB_ERR(csb, "Operand Overlap error");
+		return -EINVAL;
+	case CSB_CC_INVALID_OPERAND:
+		CSB_ERR(csb, "Invalid operand");
+		return -EINVAL;
+	case CSB_CC_NOSPC:
+		/* output buffer too small */
+		return -ENOSPC;
+	case CSB_CC_ABORT:
+		CSB_ERR(csb, "Function aborted");
+		return -EINTR;
+	case CSB_CC_CRC_MISMATCH:
+		CSB_ERR(csb, "CRC mismatch");
+		return -EINVAL;
+	case CSB_CC_TEMPL_INVALID:
+		CSB_ERR(csb, "Compressed data template invalid");
+		return -EINVAL;
+	case CSB_CC_TEMPL_OVERFLOW:
+		CSB_ERR(csb, "Compressed data template shows data past end");
+		return -EINVAL;
+
+	/* these should not happen */
+	case CSB_CC_INVALID_ALIGN:
+		/* setup_ddl should have detected this */
+		CSB_ERR_ADDR(csb, "Invalid alignment");
+		return -EINVAL;
+	case CSB_CC_DATA_LENGTH:
+		/* setup_ddl should have detected this */
+		CSB_ERR(csb, "Invalid data length");
+		return -EINVAL;
+	case CSB_CC_WR_TRANSLATION:
+	case CSB_CC_TRANSLATION:
+	case CSB_CC_TRANSLATION_DUP1:
+	case CSB_CC_TRANSLATION_DUP2:
+	case CSB_CC_TRANSLATION_DUP3:
+	case CSB_CC_TRANSLATION_DUP4:
+	case CSB_CC_TRANSLATION_DUP5:
+	case CSB_CC_TRANSLATION_DUP6:
+		/* should not happen, we use physical addrs */
+		CSB_ERR_ADDR(csb, "Translation error");
+		return -EPROTO;
+	case CSB_CC_WR_PROTECTION:
+	case CSB_CC_PROTECTION:
+	case CSB_CC_PROTECTION_DUP1:
+	case CSB_CC_PROTECTION_DUP2:
+	case CSB_CC_PROTECTION_DUP3:
+	case CSB_CC_PROTECTION_DUP4:
+	case CSB_CC_PROTECTION_DUP5:
+	case CSB_CC_PROTECTION_DUP6:
+		/* should not happen, we use physical addrs */
+		CSB_ERR_ADDR(csb, "Protection error");
+		return -EPROTO;
+	case CSB_CC_PRIVILEGE:
+		/* shouldn't happen, we're in HYP mode */
+		CSB_ERR(csb, "Insufficient Privilege error");
+		return -EPROTO;
+	case CSB_CC_EXCESSIVE_DDE:
+		/* shouldn't happen, setup_ddl doesn't use many dde's */
+		CSB_ERR(csb, "Too many DDEs in DDL");
+		return -EINVAL;
+	case CSB_CC_TRANSPORT:
+		/* shouldn't happen, we setup CRB correctly */
+		CSB_ERR(csb, "Invalid CRB");
+		return -EINVAL;
+	case CSB_CC_SEGMENTED_DDL:
+		/* shouldn't happen, setup_ddl creates DDL right */
+		CSB_ERR(csb, "Segmented DDL error");
+		return -EINVAL;
+	case CSB_CC_DDE_OVERFLOW:
+		/* shouldn't happen, setup_ddl creates DDL right */
+		CSB_ERR(csb, "DDE overflow error");
+		return -EINVAL;
+	case CSB_CC_SESSION:
+		/* should not happen with ICSWX */
+		CSB_ERR(csb, "Session violation error");
+		return -EPROTO;
+	case CSB_CC_CHAIN:
+		/* should not happen, we don't use chained CRBs */
+		CSB_ERR(csb, "Chained CRB error");
+		return -EPROTO;
+	case CSB_CC_SEQUENCE:
+		/* should not happen, we don't use chained CRBs */
+		CSB_ERR(csb, "CRB seqeunce number error");
+		return -EPROTO;
+	case CSB_CC_UNKNOWN_CODE:
+		CSB_ERR(csb, "Unknown subfunction code");
+		return -EPROTO;
+
+	/* hardware errors */
+	case CSB_CC_RD_EXTERNAL:
+	case CSB_CC_RD_EXTERNAL_DUP1:
+	case CSB_CC_RD_EXTERNAL_DUP2:
+	case CSB_CC_RD_EXTERNAL_DUP3:
+		CSB_ERR_ADDR(csb, "Read error outside coprocessor");
+		return -EPROTO;
+	case CSB_CC_WR_EXTERNAL:
+		CSB_ERR_ADDR(csb, "Write error outside coprocessor");
+		return -EPROTO;
+	case CSB_CC_INTERNAL:
+		CSB_ERR(csb, "Internal error in coprocessor");
+		return -EPROTO;
+	case CSB_CC_PROVISION:
+		CSB_ERR(csb, "Storage provision error");
+		return -EPROTO;
+	case CSB_CC_HW:
+		CSB_ERR(csb, "Correctable hardware error");
+		return -EPROTO;
+
+	default:
+		CSB_ERR(csb, "Invalid CC %d", csb->cc);
+		return -EPROTO;
+	}
+
+	/* check Completion Extension state */
+	if (csb->ce & CSB_CE_TERMINATION) {
+		CSB_ERR(csb, "CSB request was terminated");
+		return -EPROTO;
+	}
+	if (csb->ce & CSB_CE_INCOMPLETE) {
+		CSB_ERR(csb, "CSB request not complete");
+		return -EPROTO;
+	}
+	if (!(csb->ce & CSB_CE_TPBC)) {
+		CSB_ERR(csb, "TPBC not provided, unknown target length");
+		return -EPROTO;
+	}
+
+	/* successful completion */
+	pr_debug_ratelimited("Processed %u bytes in %lu us\n", csb->count,
+			     (unsigned long)ktime_us_delta(now, start));
+
+	return 0;
+}
+
+/**
+ * nx842_powernv_function - compress/decompress data using the 842 algorithm
+ *
+ * (De)compression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * This compresses or decompresses the provided input buffer into the provided
+ * output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * output data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * The @workmem buffer should only be used by one function call at a time.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS
+ * @fc: function code, see CCW Function Codes in nx-842.h
+ *
+ * Returns:
+ *   0		Success, output of length @outlenp stored in the buffer at @out
+ *   -ENODEV	Hardware unavailable
+ *   -ENOSPC	Output buffer is to small
+ *   -EMSGSIZE	Input buffer too large
+ *   -EINVAL	buffer constraints do not fix nx842_constraints
+ *   -EPROTO	hardware error during operation
+ *   -ETIMEDOUT	hardware did not complete operation in reasonable time
+ *   -EINTR	operation was aborted
+ */
+static int nx842_powernv_function(const unsigned char *in, unsigned int inlen,
+				  unsigned char *out, unsigned int *outlenp,
+				  void *workmem, int fc)
+{
+	struct coprocessor_request_block *crb;
+	struct coprocessor_status_block *csb;
+	struct nx842_workmem *wmem;
+	int ret;
+	u64 csb_addr;
+	u32 ccw;
+	unsigned int outlen = *outlenp;
+
+	wmem = PTR_ALIGN(workmem, WORKMEM_ALIGN);
+
+	*outlenp = 0;
+
+	/* shoudn't happen, we don't load without a coproc */
+	if (!nx842_ct) {
+		pr_err_ratelimited("coprocessor CT is 0");
+		return -ENODEV;
+	}
+
+	crb = &wmem->crb;
+	csb = &crb->csb;
+
+	/* Clear any previous values */
+	memset(crb, 0, sizeof(*crb));
+
+	/* set up DDLs */
+	ret = setup_ddl(&crb->source, wmem->ddl_in,
+			(unsigned char *)in, inlen, true);
+	if (ret)
+		return ret;
+	ret = setup_ddl(&crb->target, wmem->ddl_out,
+			out, outlen, false);
+	if (ret)
+		return ret;
+
+	/* set up CCW */
+	ccw = 0;
+	ccw = SET_FIELD(ccw, CCW_CT, nx842_ct);
+	ccw = SET_FIELD(ccw, CCW_CI_842, 0); /* use 0 for hw auto-selection */
+	ccw = SET_FIELD(ccw, CCW_FC_842, fc);
+
+	/* set up CRB's CSB addr */
+	csb_addr = nx842_get_pa(csb) & CRB_CSB_ADDRESS;
+	csb_addr |= CRB_CSB_AT; /* Addrs are phys */
+	crb->csb_addr = cpu_to_be64(csb_addr);
+
+	wmem->start = ktime_get();
+
+	/* do ICSWX */
+	ret = icswx(cpu_to_be32(ccw), crb);
+
+	pr_debug_ratelimited("icswx CR %x ccw %x crb->ccw %x\n", ret,
+			     (unsigned int)ccw,
+			     (unsigned int)be32_to_cpu(crb->ccw));
+
+	switch (ret) {
+	case ICSWX_INITIATED:
+		ret = wait_for_csb(wmem, csb);
+		break;
+	case ICSWX_BUSY:
+		pr_debug_ratelimited("842 Coprocessor busy\n");
+		ret = -EBUSY;
+		break;
+	case ICSWX_REJECTED:
+		pr_err_ratelimited("ICSWX rejected\n");
+		ret = -EPROTO;
+		break;
+	default:
+		pr_err_ratelimited("Invalid ICSWX return code %x\n", ret);
+		ret = -EPROTO;
+		break;
+	}
+
+	if (!ret)
+		*outlenp = be32_to_cpu(csb->count);
+
+	return ret;
+}
+
+/**
+ * nx842_powernv_compress - Compress data using the 842 algorithm
+ *
+ * Compression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * The input buffer is compressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * compressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS
+ *
+ * Returns: see @nx842_powernv_function()
+ */
+static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
+				  unsigned char *out, unsigned int *outlenp,
+				  void *wmem)
+{
+	return nx842_powernv_function(in, inlen, out, outlenp,
+				      wmem, CCW_FC_842_COMP_NOCRC);
+}
+
+/**
+ * nx842_powernv_decompress - Decompress data using the 842 algorithm
+ *
+ * Decompression provided by the NX842 coprocessor on IBM PowerNV systems.
+ * The input buffer is decompressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * decompressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: input buffer pointer
+ * @inlen: input buffer size
+ * @out: output buffer pointer
+ * @outlenp: output buffer size pointer
+ * @workmem: working memory buffer pointer, must be at least NX842_MEM_COMPRESS
+ *
+ * Returns: see @nx842_powernv_function()
+ */
+static int nx842_powernv_decompress(const unsigned char *in, unsigned int inlen,
+				    unsigned char *out, unsigned int *outlenp,
+				    void *wmem)
+{
+	return nx842_powernv_function(in, inlen, out, outlenp,
+				      wmem, CCW_FC_842_DECOMP_NOCRC);
+}
+
+static int __init nx842_powernv_probe(struct device_node *dn)
+{
+	struct nx842_coproc *coproc;
+	struct property *ct_prop, *ci_prop;
+	unsigned int ct, ci;
+	int chip_id;
+
+	chip_id = of_get_ibm_chip_id(dn);
+	if (chip_id < 0) {
+		pr_err("ibm,chip-id missing\n");
+		return -EINVAL;
+	}
+	ct_prop = of_find_property(dn, "ibm,842-coprocessor-type", NULL);
+	if (!ct_prop) {
+		pr_err("ibm,842-coprocessor-type missing\n");
+		return -EINVAL;
+	}
+	ct = be32_to_cpu(*(unsigned int *)ct_prop->value);
+	ci_prop = of_find_property(dn, "ibm,842-coprocessor-instance", NULL);
+	if (!ci_prop) {
+		pr_err("ibm,842-coprocessor-instance missing\n");
+		return -EINVAL;
+	}
+	ci = be32_to_cpu(*(unsigned int *)ci_prop->value);
+
+	coproc = kmalloc(sizeof(*coproc), GFP_KERNEL);
+	if (!coproc)
+		return -ENOMEM;
+
+	coproc->chip_id = chip_id;
+	coproc->ct = ct;
+	coproc->ci = ci;
+	INIT_LIST_HEAD(&coproc->list);
+	list_add(&coproc->list, &nx842_coprocs);
+
+	pr_info("coprocessor found on chip %d, CT %d CI %d\n", chip_id, ct, ci);
+
+	if (!nx842_ct)
+		nx842_ct = ct;
+	else if (nx842_ct != ct)
+		pr_err("NX842 chip %d, CT %d != first found CT %d\n",
+		       chip_id, ct, nx842_ct);
+
+	return 0;
+}
+
+static struct nx842_constraints nx842_powernv_constraints = {
+	.alignment =	DDE_BUFFER_ALIGN,
+	.multiple =	DDE_BUFFER_LAST_MULT,
+	.minimum =	DDE_BUFFER_LAST_MULT,
+	.maximum =	(DDL_LEN_MAX - 1) * PAGE_SIZE,
+};
+
+static struct nx842_driver nx842_powernv_driver = {
+	.owner =	THIS_MODULE,
+	.constraints =	&nx842_powernv_constraints,
+	.compress =	nx842_powernv_compress,
+	.decompress =	nx842_powernv_decompress,
+};
+
+static __init int nx842_powernv_init(void)
+{
+	struct device_node *dn;
+
+	/* verify workmem size/align restrictions */
+	BUILD_BUG_ON(sizeof(struct nx842_workmem) > NX842_MEM_COMPRESS);
+	BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
+	BUILD_BUG_ON(CRB_ALIGN % DDE_ALIGN);
+	BUILD_BUG_ON(CRB_SIZE % DDE_ALIGN);
+	/* verify buffer size/align restrictions */
+	BUILD_BUG_ON(PAGE_SIZE % DDE_BUFFER_ALIGN);
+	BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
+	BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
+
+	pr_info("loading\n");
+
+	for_each_compatible_node(dn, NULL, NX842_POWERNV_COMPAT_NAME)
+		nx842_powernv_probe(dn);
+
+	if (!nx842_ct) {
+		pr_err("no coprocessors found\n");
+		return -ENODEV;
+	}
+
+	nx842_register_driver(&nx842_powernv_driver);
+
+	pr_info("loaded\n");
+
+	return 0;
+}
+module_init(nx842_powernv_init);
+
+static void __exit nx842_powernv_exit(void)
+{
+	struct nx842_coproc *coproc, *n;
+
+	nx842_unregister_driver(&nx842_powernv_driver);
+
+	list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
+		list_del(&coproc->list);
+		kfree(coproc);
+	}
+
+	pr_info("unloaded\n");
+}
+module_exit(nx842_powernv_exit);
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
new file mode 100644
index 0000000..85837e9
--- /dev/null
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -0,0 +1,1128 @@
+/*
+ * Driver for IBM Power 842 compression accelerator
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *          Seth Jennings <sjenning@linux.vnet.ibm.com>
+ */
+
+#include <asm/vio.h>
+
+#include "nx-842.h"
+#include "nx_csbcpb.h" /* struct nx_csbcpb */
+
+#define MODULE_NAME NX842_PSERIES_MODULE_NAME
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+
+/* IO buffer must be 128 byte aligned */
+#define IO_BUFFER_ALIGN 128
+
+static struct nx842_constraints nx842_pseries_constraints = {
+	.alignment =	IO_BUFFER_ALIGN,
+	.multiple =	DDE_BUFFER_LAST_MULT,
+	.minimum =	IO_BUFFER_ALIGN,
+	.maximum =	PAGE_SIZE, /* dynamic, max_sync_size */
+};
+
+static int check_constraints(unsigned long buf, unsigned int *len, bool in)
+{
+	if (!IS_ALIGNED(buf, nx842_pseries_constraints.alignment)) {
+		pr_debug("%s buffer 0x%lx not aligned to 0x%x\n",
+			 in ? "input" : "output", buf,
+			 nx842_pseries_constraints.alignment);
+		return -EINVAL;
+	}
+	if (*len % nx842_pseries_constraints.multiple) {
+		pr_debug("%s buffer len 0x%x not multiple of 0x%x\n",
+			 in ? "input" : "output", *len,
+			 nx842_pseries_constraints.multiple);
+		if (in)
+			return -EINVAL;
+		*len = round_down(*len, nx842_pseries_constraints.multiple);
+	}
+	if (*len < nx842_pseries_constraints.minimum) {
+		pr_debug("%s buffer len 0x%x under minimum 0x%x\n",
+			 in ? "input" : "output", *len,
+			 nx842_pseries_constraints.minimum);
+		return -EINVAL;
+	}
+	if (*len > nx842_pseries_constraints.maximum) {
+		pr_debug("%s buffer len 0x%x over maximum 0x%x\n",
+			 in ? "input" : "output", *len,
+			 nx842_pseries_constraints.maximum);
+		if (in)
+			return -EINVAL;
+		*len = nx842_pseries_constraints.maximum;
+	}
+	return 0;
+}
+
+/* I assume we need to align the CSB? */
+#define WORKMEM_ALIGN	(256)
+
+struct nx842_workmem {
+	/* scatterlist */
+	char slin[4096];
+	char slout[4096];
+	/* coprocessor status/parameter block */
+	struct nx_csbcpb csbcpb;
+
+	char padding[WORKMEM_ALIGN];
+} __aligned(WORKMEM_ALIGN);
+
+/* Macros for fields within nx_csbcpb */
+/* Check the valid bit within the csbcpb valid field */
+#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
+
+/* CE macros operate on the completion_extension field bits in the csbcpb.
+ * CE0 0=full completion, 1=partial completion
+ * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
+ * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
+#define NX842_CSBCPB_CE0(x)	(x & BIT_MASK(7))
+#define NX842_CSBCPB_CE1(x)	(x & BIT_MASK(6))
+#define NX842_CSBCPB_CE2(x)	(x & BIT_MASK(5))
+
+/* The NX unit accepts data only on 4K page boundaries */
+#define NX842_HW_PAGE_SIZE	(4096)
+#define NX842_HW_PAGE_MASK	(~(NX842_HW_PAGE_SIZE-1))
+
+enum nx842_status {
+	UNAVAILABLE,
+	AVAILABLE
+};
+
+struct ibm_nx842_counters {
+	atomic64_t comp_complete;
+	atomic64_t comp_failed;
+	atomic64_t decomp_complete;
+	atomic64_t decomp_failed;
+	atomic64_t swdecomp;
+	atomic64_t comp_times[32];
+	atomic64_t decomp_times[32];
+};
+
+static struct nx842_devdata {
+	struct vio_dev *vdev;
+	struct device *dev;
+	struct ibm_nx842_counters *counters;
+	unsigned int max_sg_len;
+	unsigned int max_sync_size;
+	unsigned int max_sync_sg;
+	enum nx842_status status;
+} __rcu *devdata;
+static DEFINE_SPINLOCK(devdata_mutex);
+
+#define NX842_COUNTER_INC(_x) \
+static inline void nx842_inc_##_x( \
+	const struct nx842_devdata *dev) { \
+	if (dev) \
+		atomic64_inc(&dev->counters->_x); \
+}
+NX842_COUNTER_INC(comp_complete);
+NX842_COUNTER_INC(comp_failed);
+NX842_COUNTER_INC(decomp_complete);
+NX842_COUNTER_INC(decomp_failed);
+NX842_COUNTER_INC(swdecomp);
+
+#define NX842_HIST_SLOTS 16
+
+static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
+{
+	int bucket = fls(time);
+
+	if (bucket)
+		bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
+
+	atomic64_inc(&times[bucket]);
+}
+
+/* NX unit operation flags */
+#define NX842_OP_COMPRESS	0x0
+#define NX842_OP_CRC		0x1
+#define NX842_OP_DECOMPRESS	0x2
+#define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
+#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
+#define NX842_OP_ASYNC		(1<<23)
+#define NX842_OP_NOTIFY		(1<<22)
+#define NX842_OP_NOTIFY_INT(x)	((x & 0xff)<<8)
+
+static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
+{
+	/* No use of DMA mappings within the driver. */
+	return 0;
+}
+
+struct nx842_slentry {
+	unsigned long ptr; /* Real address (use __pa()) */
+	unsigned long len;
+};
+
+/* pHyp scatterlist entry */
+struct nx842_scatterlist {
+	int entry_nr; /* number of slentries */
+	struct nx842_slentry *entries; /* ptr to array of slentries */
+};
+
+/* Does not include sizeof(entry_nr) in the size */
+static inline unsigned long nx842_get_scatterlist_size(
+				struct nx842_scatterlist *sl)
+{
+	return sl->entry_nr * sizeof(struct nx842_slentry);
+}
+
+static int nx842_build_scatterlist(unsigned long buf, int len,
+			struct nx842_scatterlist *sl)
+{
+	unsigned long nextpage;
+	struct nx842_slentry *entry;
+
+	sl->entry_nr = 0;
+
+	entry = sl->entries;
+	while (len) {
+		entry->ptr = nx842_get_pa((void *)buf);
+		nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
+		if (nextpage < buf + len) {
+			/* we aren't at the end yet */
+			if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
+				/* we are in the middle (or beginning) */
+				entry->len = NX842_HW_PAGE_SIZE;
+			else
+				/* we are at the beginning */
+				entry->len = nextpage - buf;
+		} else {
+			/* at the end */
+			entry->len = len;
+		}
+
+		len -= entry->len;
+		buf += entry->len;
+		sl->entry_nr++;
+		entry++;
+	}
+
+	return 0;
+}
+
+static int nx842_validate_result(struct device *dev,
+	struct cop_status_block *csb)
+{
+	/* The csb must be valid after returning from vio_h_cop_sync */
+	if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
+		dev_err(dev, "%s: cspcbp not valid upon completion.\n",
+				__func__);
+		dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
+				csb->valid,
+				csb->crb_seq_number,
+				csb->completion_code,
+				csb->completion_extension);
+		dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
+				csb->processed_byte_count,
+				(unsigned long)csb->address);
+		return -EIO;
+	}
+
+	/* Check return values from the hardware in the CSB */
+	switch (csb->completion_code) {
+	case 0:	/* Completed without error */
+		break;
+	case 64: /* Target bytes > Source bytes during compression */
+	case 13: /* Output buffer too small */
+		dev_dbg(dev, "%s: Compression output larger than input\n",
+					__func__);
+		return -ENOSPC;
+	case 66: /* Input data contains an illegal template field */
+	case 67: /* Template indicates data past the end of the input stream */
+		dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
+					__func__, csb->completion_code);
+		return -EINVAL;
+	default:
+		dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
+					__func__, csb->completion_code);
+		return -EIO;
+	}
+
+	/* Hardware sanity check */
+	if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
+		dev_err(dev, "%s: No error returned by hardware, but "
+				"data returned is unusable, contact support.\n"
+				"(Additional info: csbcbp->processed bytes "
+				"does not specify processed bytes for the "
+				"target buffer.)\n", __func__);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * nx842_pseries_compress - Compress data using the 842 algorithm
+ *
+ * Compression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is compressed and the result is stored in the
+ * provided output buffer.
+ *
+ * Upon return from this function @outlen contains the length of the
+ * compressed data.  If there is an error then @outlen will be 0 and an
+ * error will be specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer
+ * @inlen: Length of input buffer
+ * @out: Pointer to output buffer
+ * @outlen: Length of output buffer
+ * @wrkmem: ptr to buffer for working memory, size determined by
+ *          NX842_MEM_COMPRESS
+ *
+ * Returns:
+ *   0		Success, output of length @outlen stored in the buffer at @out
+ *   -ENOMEM	Unable to allocate internal buffers
+ *   -ENOSPC	Output buffer is to small
+ *   -EIO	Internal error
+ *   -ENODEV	Hardware unavailable
+ */
+static int nx842_pseries_compress(const unsigned char *in, unsigned int inlen,
+				  unsigned char *out, unsigned int *outlen,
+				  void *wmem)
+{
+	struct nx842_devdata *local_devdata;
+	struct device *dev = NULL;
+	struct nx842_workmem *workmem;
+	struct nx842_scatterlist slin, slout;
+	struct nx_csbcpb *csbcpb;
+	int ret = 0, max_sync_size;
+	unsigned long inbuf, outbuf;
+	struct vio_pfo_op op = {
+		.done = NULL,
+		.handle = 0,
+		.timeout = 0,
+	};
+	unsigned long start = get_tb();
+
+	inbuf = (unsigned long)in;
+	if (check_constraints(inbuf, &inlen, true))
+		return -EINVAL;
+
+	outbuf = (unsigned long)out;
+	if (check_constraints(outbuf, outlen, false))
+		return -EINVAL;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (!local_devdata || !local_devdata->dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+	max_sync_size = local_devdata->max_sync_size;
+	dev = local_devdata->dev;
+
+	/* Init scatterlist */
+	workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
+	slin.entries = (struct nx842_slentry *)workmem->slin;
+	slout.entries = (struct nx842_slentry *)workmem->slout;
+
+	/* Init operation */
+	op.flags = NX842_OP_COMPRESS;
+	csbcpb = &workmem->csbcpb;
+	memset(csbcpb, 0, sizeof(*csbcpb));
+	op.csbcpb = nx842_get_pa(csbcpb);
+	op.out = nx842_get_pa(slout.entries);
+
+	if ((inbuf & NX842_HW_PAGE_MASK) ==
+	    ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
+		/* Create direct DDE */
+		op.in = nx842_get_pa((void *)inbuf);
+		op.inlen = inlen;
+	} else {
+		/* Create indirect DDE (scatterlist) */
+		nx842_build_scatterlist(inbuf, inlen, &slin);
+		op.in = nx842_get_pa(slin.entries);
+		op.inlen = -nx842_get_scatterlist_size(&slin);
+	}
+
+	if ((outbuf & NX842_HW_PAGE_MASK) ==
+	    ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
+		/* Create direct DDE */
+		op.out = nx842_get_pa((void *)outbuf);
+		op.outlen = *outlen;
+	} else {
+		/* Create indirect DDE (scatterlist) */
+		nx842_build_scatterlist(outbuf, *outlen, &slout);
+		op.out = nx842_get_pa(slout.entries);
+		op.outlen = -nx842_get_scatterlist_size(&slout);
+	}
+
+	/* Send request to pHyp */
+	ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+	/* Check for pHyp error */
+	if (ret) {
+		dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+			__func__, ret, op.hcall_err);
+		ret = -EIO;
+		goto unlock;
+	}
+
+	/* Check for hardware error */
+	ret = nx842_validate_result(dev, &csbcpb->csb);
+	if (ret)
+		goto unlock;
+
+	*outlen = csbcpb->csb.processed_byte_count;
+	dev_dbg(dev, "%s: processed_bytes=%d\n", __func__, *outlen);
+
+unlock:
+	if (ret)
+		nx842_inc_comp_failed(local_devdata);
+	else {
+		nx842_inc_comp_complete(local_devdata);
+		ibm_nx842_incr_hist(local_devdata->counters->comp_times,
+			(get_tb() - start) / tb_ticks_per_usec);
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * nx842_pseries_decompress - Decompress data using the 842 algorithm
+ *
+ * Decompression provide by the NX842 coprocessor on IBM Power systems.
+ * The input buffer is decompressed and the result is stored in the
+ * provided output buffer.  The size allocated to the output buffer is
+ * provided by the caller of this function in @outlen.  Upon return from
+ * this function @outlen contains the length of the decompressed data.
+ * If there is an error then @outlen will be 0 and an error will be
+ * specified by the return code from this function.
+ *
+ * @in: Pointer to input buffer
+ * @inlen: Length of input buffer
+ * @out: Pointer to output buffer
+ * @outlen: Length of output buffer
+ * @wrkmem: ptr to buffer for working memory, size determined by
+ *          NX842_MEM_COMPRESS
+ *
+ * Returns:
+ *   0		Success, output of length @outlen stored in the buffer at @out
+ *   -ENODEV	Hardware decompression device is unavailable
+ *   -ENOMEM	Unable to allocate internal buffers
+ *   -ENOSPC	Output buffer is to small
+ *   -EINVAL	Bad input data encountered when attempting decompress
+ *   -EIO	Internal error
+ */
+static int nx842_pseries_decompress(const unsigned char *in, unsigned int inlen,
+				    unsigned char *out, unsigned int *outlen,
+				    void *wmem)
+{
+	struct nx842_devdata *local_devdata;
+	struct device *dev = NULL;
+	struct nx842_workmem *workmem;
+	struct nx842_scatterlist slin, slout;
+	struct nx_csbcpb *csbcpb;
+	int ret = 0, max_sync_size;
+	unsigned long inbuf, outbuf;
+	struct vio_pfo_op op = {
+		.done = NULL,
+		.handle = 0,
+		.timeout = 0,
+	};
+	unsigned long start = get_tb();
+
+	/* Ensure page alignment and size */
+	inbuf = (unsigned long)in;
+	if (check_constraints(inbuf, &inlen, true))
+		return -EINVAL;
+
+	outbuf = (unsigned long)out;
+	if (check_constraints(outbuf, outlen, false))
+		return -EINVAL;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (!local_devdata || !local_devdata->dev) {
+		rcu_read_unlock();
+		return -ENODEV;
+	}
+	max_sync_size = local_devdata->max_sync_size;
+	dev = local_devdata->dev;
+
+	workmem = PTR_ALIGN(wmem, WORKMEM_ALIGN);
+
+	/* Init scatterlist */
+	slin.entries = (struct nx842_slentry *)workmem->slin;
+	slout.entries = (struct nx842_slentry *)workmem->slout;
+
+	/* Init operation */
+	op.flags = NX842_OP_DECOMPRESS;
+	csbcpb = &workmem->csbcpb;
+	memset(csbcpb, 0, sizeof(*csbcpb));
+	op.csbcpb = nx842_get_pa(csbcpb);
+
+	if ((inbuf & NX842_HW_PAGE_MASK) ==
+	    ((inbuf + inlen - 1) & NX842_HW_PAGE_MASK)) {
+		/* Create direct DDE */
+		op.in = nx842_get_pa((void *)inbuf);
+		op.inlen = inlen;
+	} else {
+		/* Create indirect DDE (scatterlist) */
+		nx842_build_scatterlist(inbuf, inlen, &slin);
+		op.in = nx842_get_pa(slin.entries);
+		op.inlen = -nx842_get_scatterlist_size(&slin);
+	}
+
+	if ((outbuf & NX842_HW_PAGE_MASK) ==
+	    ((outbuf + *outlen - 1) & NX842_HW_PAGE_MASK)) {
+		/* Create direct DDE */
+		op.out = nx842_get_pa((void *)outbuf);
+		op.outlen = *outlen;
+	} else {
+		/* Create indirect DDE (scatterlist) */
+		nx842_build_scatterlist(outbuf, *outlen, &slout);
+		op.out = nx842_get_pa(slout.entries);
+		op.outlen = -nx842_get_scatterlist_size(&slout);
+	}
+
+	/* Send request to pHyp */
+	ret = vio_h_cop_sync(local_devdata->vdev, &op);
+
+	/* Check for pHyp error */
+	if (ret) {
+		dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
+			__func__, ret, op.hcall_err);
+		goto unlock;
+	}
+
+	/* Check for hardware error */
+	ret = nx842_validate_result(dev, &csbcpb->csb);
+	if (ret)
+		goto unlock;
+
+	*outlen = csbcpb->csb.processed_byte_count;
+
+unlock:
+	if (ret)
+		/* decompress fail */
+		nx842_inc_decomp_failed(local_devdata);
+	else {
+		nx842_inc_decomp_complete(local_devdata);
+		ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
+			(get_tb() - start) / tb_ticks_per_usec);
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * nx842_OF_set_defaults -- Set default (disabled) values for devdata
+ *
+ * @devdata - struct nx842_devdata to update
+ *
+ * Returns:
+ *  0 on success
+ *  -ENOENT if @devdata ptr is NULL
+ */
+static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
+{
+	if (devdata) {
+		devdata->max_sync_size = 0;
+		devdata->max_sync_sg = 0;
+		devdata->max_sg_len = 0;
+		devdata->status = UNAVAILABLE;
+		return 0;
+	} else
+		return -ENOENT;
+}
+
+/**
+ * nx842_OF_upd_status -- Update the device info from OF status prop
+ *
+ * The status property indicates if the accelerator is enabled.  If the
+ * device is in the OF tree it indicates that the hardware is present.
+ * The status field indicates if the device is enabled when the status
+ * is 'okay'.  Otherwise the device driver will be disabled.
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 - Device is available
+ *  -EINVAL - Device is not available
+ */
+static int nx842_OF_upd_status(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const char *status = (const char *)prop->value;
+
+	if (!strncmp(status, "okay", (size_t)prop->length)) {
+		devdata->status = AVAILABLE;
+	} else {
+		dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
+				__func__, status);
+		devdata->status = UNAVAILABLE;
+	}
+
+	return ret;
+}
+
+/**
+ * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
+ *
+ * Definition of the 'ibm,max-sg-len' OF property:
+ *  This field indicates the maximum byte length of a scatter list
+ *  for the platform facility. It is a single cell encoded as with encode-int.
+ *
+ * Example:
+ *  # od -x ibm,max-sg-len
+ *  0000000 0000 0ff0
+ *
+ *  In this example, the maximum byte length of a scatter list is
+ *  0x0ff0 (4,080).
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const int *maxsglen = prop->value;
+
+	if (prop->length != sizeof(*maxsglen)) {
+		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
+		dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
+				prop->length, sizeof(*maxsglen));
+		ret = -EINVAL;
+	} else {
+		devdata->max_sg_len = (unsigned int)min(*maxsglen,
+				(int)NX842_HW_PAGE_SIZE);
+	}
+
+	return ret;
+}
+
+/**
+ * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
+ *
+ * Definition of the 'ibm,max-sync-cop' OF property:
+ *  Two series of cells.  The first series of cells represents the maximums
+ *  that can be synchronously compressed. The second series of cells
+ *  represents the maximums that can be synchronously decompressed.
+ *  1. The first cell in each series contains the count of the number of
+ *     data length, scatter list elements pairs that follow – each being
+ *     of the form
+ *    a. One cell data byte length
+ *    b. One cell total number of scatter list elements
+ *
+ * Example:
+ *  # od -x ibm,max-sync-cop
+ *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
+ *  0000020 0000 1000 0000 01fe
+ *
+ *  In this example, compression supports 0x1000 (4,096) data byte length
+ *  and 0x1fe (510) total scatter list elements.  Decompression supports
+ *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
+ *  elements.
+ *
+ * @devdata - struct nx842_devdata to update
+ * @prop - struct property point containing the maxsyncop for the update
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL on failure
+ */
+static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
+					struct property *prop) {
+	int ret = 0;
+	const struct maxsynccop_t {
+		int comp_elements;
+		int comp_data_limit;
+		int comp_sg_limit;
+		int decomp_elements;
+		int decomp_data_limit;
+		int decomp_sg_limit;
+	} *maxsynccop;
+
+	if (prop->length != sizeof(*maxsynccop)) {
+		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
+		dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
+				sizeof(*maxsynccop));
+		ret = -EINVAL;
+		goto out;
+	}
+
+	maxsynccop = (const struct maxsynccop_t *)prop->value;
+
+	/* Use one limit rather than separate limits for compression and
+	 * decompression. Set a maximum for this so as not to exceed the
+	 * size that the header can support and round the value down to
+	 * the hardware page size (4K) */
+	devdata->max_sync_size =
+			(unsigned int)min(maxsynccop->comp_data_limit,
+					maxsynccop->decomp_data_limit);
+
+	devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
+					65536);
+
+	if (devdata->max_sync_size < 4096) {
+		dev_err(devdata->dev, "%s: hardware max data size (%u) is "
+				"less than the driver minimum, unable to use "
+				"the hardware device\n",
+				__func__, devdata->max_sync_size);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	nx842_pseries_constraints.maximum = devdata->max_sync_size;
+
+	devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
+						maxsynccop->decomp_sg_limit);
+	if (devdata->max_sync_sg < 1) {
+		dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
+				"less than the driver minimum, unable to use "
+				"the hardware device\n",
+				__func__, devdata->max_sync_sg);
+		ret = -EINVAL;
+		goto out;
+	}
+
+out:
+	return ret;
+}
+
+/**
+ *
+ * nx842_OF_upd -- Handle OF properties updates for the device.
+ *
+ * Set all properties from the OF tree.  Optionally, a new property
+ * can be provided by the @new_prop pointer to overwrite an existing value.
+ * The device will remain disabled until all values are valid, this function
+ * will return an error for updates unless all values are valid.
+ *
+ * @new_prop: If not NULL, this property is being updated.  If NULL, update
+ *  all properties from the current values in the OF tree.
+ *
+ * Returns:
+ *  0 - Success
+ *  -ENOMEM - Could not allocate memory for new devdata structure
+ *  -EINVAL - property value not found, new_prop is not a recognized
+ *	property for the device or property value is not valid.
+ *  -ENODEV - Device is not available
+ */
+static int nx842_OF_upd(struct property *new_prop)
+{
+	struct nx842_devdata *old_devdata = NULL;
+	struct nx842_devdata *new_devdata = NULL;
+	struct device_node *of_node = NULL;
+	struct property *status = NULL;
+	struct property *maxsglen = NULL;
+	struct property *maxsyncop = NULL;
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	if (old_devdata)
+		of_node = old_devdata->dev->of_node;
+
+	if (!old_devdata || !of_node) {
+		pr_err("%s: device is not available\n", __func__);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+		return -ENODEV;
+	}
+
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+	if (!new_devdata) {
+		dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
+		ret = -ENOMEM;
+		goto error_out;
+	}
+
+	memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
+	new_devdata->counters = old_devdata->counters;
+
+	/* Set ptrs for existing properties */
+	status = of_find_property(of_node, "status", NULL);
+	maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
+	maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
+	if (!status || !maxsglen || !maxsyncop) {
+		dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
+		ret = -EINVAL;
+		goto error_out;
+	}
+
+	/*
+	 * If this is a property update, there are only certain properties that
+	 * we care about. Bail if it isn't in the below list
+	 */
+	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
+		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
+		goto out;
+
+	/* Perform property updates */
+	ret = nx842_OF_upd_status(new_devdata, status);
+	if (ret)
+		goto error_out;
+
+	ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
+	if (ret)
+		goto error_out;
+
+	ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
+	if (ret)
+		goto error_out;
+
+out:
+	dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
+			__func__, new_devdata->max_sync_size,
+			old_devdata->max_sync_size);
+	dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
+			__func__, new_devdata->max_sync_sg,
+			old_devdata->max_sync_sg);
+	dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
+			__func__, new_devdata->max_sg_len,
+			old_devdata->max_sg_len);
+
+	rcu_assign_pointer(devdata, new_devdata);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	dev_set_drvdata(new_devdata->dev, new_devdata);
+	kfree(old_devdata);
+	return 0;
+
+error_out:
+	if (new_devdata) {
+		dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
+		nx842_OF_set_defaults(new_devdata);
+		rcu_assign_pointer(devdata, new_devdata);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+		synchronize_rcu();
+		dev_set_drvdata(new_devdata->dev, new_devdata);
+		kfree(old_devdata);
+	} else {
+		dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
+		spin_unlock_irqrestore(&devdata_mutex, flags);
+	}
+
+	if (!ret)
+		ret = -EINVAL;
+	return ret;
+}
+
+/**
+ * nx842_OF_notifier - Process updates to OF properties for the device
+ *
+ * @np: notifier block
+ * @action: notifier action
+ * @update: struct pSeries_reconfig_prop_update pointer if action is
+ *	PSERIES_UPDATE_PROPERTY
+ *
+ * Returns:
+ *	NOTIFY_OK on success
+ *	NOTIFY_BAD encoded with error number on failure, use
+ *		notifier_to_errno() to decode this value
+ */
+static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
+			     void *data)
+{
+	struct of_reconfig_data *upd = data;
+	struct nx842_devdata *local_devdata;
+	struct device_node *node = NULL;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (local_devdata)
+		node = local_devdata->dev->of_node;
+
+	if (local_devdata &&
+			action == OF_RECONFIG_UPDATE_PROPERTY &&
+			!strcmp(upd->dn->name, node->name)) {
+		rcu_read_unlock();
+		nx842_OF_upd(upd->prop);
+	} else
+		rcu_read_unlock();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block nx842_of_nb = {
+	.notifier_call = nx842_OF_notifier,
+};
+
+#define nx842_counter_read(_name)					\
+static ssize_t nx842_##_name##_show(struct device *dev,		\
+		struct device_attribute *attr,				\
+		char *buf) {						\
+	struct nx842_devdata *local_devdata;			\
+	int p = 0;							\
+	rcu_read_lock();						\
+	local_devdata = rcu_dereference(devdata);			\
+	if (local_devdata)						\
+		p = snprintf(buf, PAGE_SIZE, "%ld\n",			\
+		       atomic64_read(&local_devdata->counters->_name));	\
+	rcu_read_unlock();						\
+	return p;							\
+}
+
+#define NX842DEV_COUNTER_ATTR_RO(_name)					\
+	nx842_counter_read(_name);					\
+	static struct device_attribute dev_attr_##_name = __ATTR(_name,	\
+						0444,			\
+						nx842_##_name##_show,\
+						NULL);
+
+NX842DEV_COUNTER_ATTR_RO(comp_complete);
+NX842DEV_COUNTER_ATTR_RO(comp_failed);
+NX842DEV_COUNTER_ATTR_RO(decomp_complete);
+NX842DEV_COUNTER_ATTR_RO(decomp_failed);
+NX842DEV_COUNTER_ATTR_RO(swdecomp);
+
+static ssize_t nx842_timehist_show(struct device *,
+		struct device_attribute *, char *);
+
+static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
+		nx842_timehist_show, NULL);
+static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
+		0444, nx842_timehist_show, NULL);
+
+static ssize_t nx842_timehist_show(struct device *dev,
+		struct device_attribute *attr, char *buf) {
+	char *p = buf;
+	struct nx842_devdata *local_devdata;
+	atomic64_t *times;
+	int bytes_remain = PAGE_SIZE;
+	int bytes;
+	int i;
+
+	rcu_read_lock();
+	local_devdata = rcu_dereference(devdata);
+	if (!local_devdata) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	if (attr == &dev_attr_comp_times)
+		times = local_devdata->counters->comp_times;
+	else if (attr == &dev_attr_decomp_times)
+		times = local_devdata->counters->decomp_times;
+	else {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
+		bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
+			       i ? (2<<(i-1)) : 0, (2<<i)-1,
+			       atomic64_read(&times[i]));
+		bytes_remain -= bytes;
+		p += bytes;
+	}
+	/* The last bucket holds everything over
+	 * 2<<(NX842_HIST_SLOTS - 2) us */
+	bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
+			2<<(NX842_HIST_SLOTS - 2),
+			atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
+	p += bytes;
+
+	rcu_read_unlock();
+	return p - buf;
+}
+
+static struct attribute *nx842_sysfs_entries[] = {
+	&dev_attr_comp_complete.attr,
+	&dev_attr_comp_failed.attr,
+	&dev_attr_decomp_complete.attr,
+	&dev_attr_decomp_failed.attr,
+	&dev_attr_swdecomp.attr,
+	&dev_attr_comp_times.attr,
+	&dev_attr_decomp_times.attr,
+	NULL,
+};
+
+static struct attribute_group nx842_attribute_group = {
+	.name = NULL,		/* put in device directory */
+	.attrs = nx842_sysfs_entries,
+};
+
+static struct nx842_driver nx842_pseries_driver = {
+	.owner =	THIS_MODULE,
+	.constraints =	&nx842_pseries_constraints,
+	.compress =	nx842_pseries_compress,
+	.decompress =	nx842_pseries_decompress,
+};
+
+static int __init nx842_probe(struct vio_dev *viodev,
+				  const struct vio_device_id *id)
+{
+	struct nx842_devdata *old_devdata, *new_devdata = NULL;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+
+	if (old_devdata && old_devdata->vdev != NULL) {
+		dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
+		ret = -1;
+		goto error_unlock;
+	}
+
+	dev_set_drvdata(&viodev->dev, NULL);
+
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+	if (!new_devdata) {
+		dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
+		ret = -ENOMEM;
+		goto error_unlock;
+	}
+
+	new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
+			GFP_NOFS);
+	if (!new_devdata->counters) {
+		dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
+		ret = -ENOMEM;
+		goto error_unlock;
+	}
+
+	new_devdata->vdev = viodev;
+	new_devdata->dev = &viodev->dev;
+	nx842_OF_set_defaults(new_devdata);
+
+	rcu_assign_pointer(devdata, new_devdata);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	kfree(old_devdata);
+
+	of_reconfig_notifier_register(&nx842_of_nb);
+
+	ret = nx842_OF_upd(NULL);
+	if (ret && ret != -ENODEV) {
+		dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
+		ret = -1;
+		goto error;
+	}
+
+	rcu_read_lock();
+	dev_set_drvdata(&viodev->dev, rcu_dereference(devdata));
+	rcu_read_unlock();
+
+	if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
+		dev_err(&viodev->dev, "could not create sysfs device attributes\n");
+		ret = -1;
+		goto error;
+	}
+
+	nx842_register_driver(&nx842_pseries_driver);
+
+	return 0;
+
+error_unlock:
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	if (new_devdata)
+		kfree(new_devdata->counters);
+	kfree(new_devdata);
+error:
+	return ret;
+}
+
+static int __exit nx842_remove(struct vio_dev *viodev)
+{
+	struct nx842_devdata *old_devdata;
+	unsigned long flags;
+
+	pr_info("Removing IBM Power 842 compression device\n");
+	sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
+
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	of_reconfig_notifier_unregister(&nx842_of_nb);
+	RCU_INIT_POINTER(devdata, NULL);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	dev_set_drvdata(&viodev->dev, NULL);
+	if (old_devdata)
+		kfree(old_devdata->counters);
+	kfree(old_devdata);
+
+	nx842_unregister_driver(&nx842_pseries_driver);
+
+	return 0;
+}
+
+static struct vio_device_id nx842_vio_driver_ids[] = {
+	{NX842_PSERIES_COMPAT_NAME "-v1", NX842_PSERIES_COMPAT_NAME},
+	{"", ""},
+};
+
+static struct vio_driver nx842_vio_driver = {
+	.name = MODULE_NAME,
+	.probe = nx842_probe,
+	.remove = __exit_p(nx842_remove),
+	.get_desired_dma = nx842_get_desired_dma,
+	.id_table = nx842_vio_driver_ids,
+};
+
+static int __init nx842_init(void)
+{
+	struct nx842_devdata *new_devdata;
+	pr_info("Registering IBM Power 842 compression driver\n");
+
+	BUILD_BUG_ON(sizeof(struct nx842_workmem) > NX842_MEM_COMPRESS);
+
+	RCU_INIT_POINTER(devdata, NULL);
+	new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
+	if (!new_devdata) {
+		pr_err("Could not allocate memory for device data\n");
+		return -ENOMEM;
+	}
+	new_devdata->status = UNAVAILABLE;
+	RCU_INIT_POINTER(devdata, new_devdata);
+
+	return vio_register_driver(&nx842_vio_driver);
+}
+
+module_init(nx842_init);
+
+static void __exit nx842_exit(void)
+{
+	struct nx842_devdata *old_devdata;
+	unsigned long flags;
+
+	pr_info("Exiting IBM Power 842 compression driver\n");
+	spin_lock_irqsave(&devdata_mutex, flags);
+	old_devdata = rcu_dereference_check(devdata,
+			lockdep_is_held(&devdata_mutex));
+	RCU_INIT_POINTER(devdata, NULL);
+	spin_unlock_irqrestore(&devdata_mutex, flags);
+	synchronize_rcu();
+	if (old_devdata && old_devdata->dev)
+		dev_set_drvdata(old_devdata->dev, NULL);
+	kfree(old_devdata);
+	nx842_unregister_driver(&nx842_pseries_driver);
+	vio_unregister_driver(&nx842_vio_driver);
+}
+
+module_exit(nx842_exit);
+
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 887196e..bf2823c 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -1,5 +1,10 @@
 /*
- * Driver for IBM Power 842 compression accelerator
+ * Driver frontend for IBM Power 842 compression accelerator
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * Designer of the Power data compression engine:
+ *   Bulent Abali <abali@us.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -10,1594 +15,170 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
- *          Seth Jennings <sjenning@linux.vnet.ibm.com>
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/nx842.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-
-#include <asm/page.h>
-#include <asm/vio.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include "nx_csbcpb.h" /* struct nx_csbcpb */
+#include "nx-842.h"
 
 #define MODULE_NAME "nx-compress"
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
 
-#define SHIFT_4K 12
-#define SHIFT_64K 16
-#define SIZE_4K (1UL << SHIFT_4K)
-#define SIZE_64K (1UL << SHIFT_64K)
-
-/* IO buffer must be 128 byte aligned */
-#define IO_BUFFER_ALIGN 128
-
-struct nx842_header {
-	int blocks_nr; /* number of compressed blocks */
-	int offset; /* offset of the first block (from beginning of header) */
-	int sizes[0]; /* size of compressed blocks */
-};
-
-static inline int nx842_header_size(const struct nx842_header *hdr)
-{
-	return sizeof(struct nx842_header) +
-			hdr->blocks_nr * sizeof(hdr->sizes[0]);
-}
-
-/* Macros for fields within nx_csbcpb */
-/* Check the valid bit within the csbcpb valid field */
-#define NX842_CSBCBP_VALID_CHK(x) (x & BIT_MASK(7))
-
-/* CE macros operate on the completion_extension field bits in the csbcpb.
- * CE0 0=full completion, 1=partial completion
- * CE1 0=CE0 indicates completion, 1=termination (output may be modified)
- * CE2 0=processed_bytes is source bytes, 1=processed_bytes is target bytes */
-#define NX842_CSBCPB_CE0(x)	(x & BIT_MASK(7))
-#define NX842_CSBCPB_CE1(x)	(x & BIT_MASK(6))
-#define NX842_CSBCPB_CE2(x)	(x & BIT_MASK(5))
-
-/* The NX unit accepts data only on 4K page boundaries */
-#define NX842_HW_PAGE_SHIFT	SHIFT_4K
-#define NX842_HW_PAGE_SIZE	(ASM_CONST(1) << NX842_HW_PAGE_SHIFT)
-#define NX842_HW_PAGE_MASK	(~(NX842_HW_PAGE_SIZE-1))
-
-enum nx842_status {
-	UNAVAILABLE,
-	AVAILABLE
-};
-
-struct ibm_nx842_counters {
-	atomic64_t comp_complete;
-	atomic64_t comp_failed;
-	atomic64_t decomp_complete;
-	atomic64_t decomp_failed;
-	atomic64_t swdecomp;
-	atomic64_t comp_times[32];
-	atomic64_t decomp_times[32];
-};
-
-static struct nx842_devdata {
-	struct vio_dev *vdev;
-	struct device *dev;
-	struct ibm_nx842_counters *counters;
-	unsigned int max_sg_len;
-	unsigned int max_sync_size;
-	unsigned int max_sync_sg;
-	enum nx842_status status;
-} __rcu *devdata;
-static DEFINE_SPINLOCK(devdata_mutex);
-
-#define NX842_COUNTER_INC(_x) \
-static inline void nx842_inc_##_x( \
-	const struct nx842_devdata *dev) { \
-	if (dev) \
-		atomic64_inc(&dev->counters->_x); \
-}
-NX842_COUNTER_INC(comp_complete);
-NX842_COUNTER_INC(comp_failed);
-NX842_COUNTER_INC(decomp_complete);
-NX842_COUNTER_INC(decomp_failed);
-NX842_COUNTER_INC(swdecomp);
-
-#define NX842_HIST_SLOTS 16
-
-static void ibm_nx842_incr_hist(atomic64_t *times, unsigned int time)
-{
-	int bucket = fls(time);
-
-	if (bucket)
-		bucket = min((NX842_HIST_SLOTS - 1), bucket - 1);
-
-	atomic64_inc(&times[bucket]);
-}
-
-/* NX unit operation flags */
-#define NX842_OP_COMPRESS	0x0
-#define NX842_OP_CRC		0x1
-#define NX842_OP_DECOMPRESS	0x2
-#define NX842_OP_COMPRESS_CRC   (NX842_OP_COMPRESS | NX842_OP_CRC)
-#define NX842_OP_DECOMPRESS_CRC (NX842_OP_DECOMPRESS | NX842_OP_CRC)
-#define NX842_OP_ASYNC		(1<<23)
-#define NX842_OP_NOTIFY		(1<<22)
-#define NX842_OP_NOTIFY_INT(x)	((x & 0xff)<<8)
+/* Only one driver is expected, based on the HW platform */
+static struct nx842_driver *nx842_driver;
+static DEFINE_SPINLOCK(nx842_driver_lock); /* protects driver pointers */
 
-static unsigned long nx842_get_desired_dma(struct vio_dev *viodev)
+void nx842_register_driver(struct nx842_driver *driver)
 {
-	/* No use of DMA mappings within the driver. */
-	return 0;
-}
-
-struct nx842_slentry {
-	unsigned long ptr; /* Real address (use __pa()) */
-	unsigned long len;
-};
-
-/* pHyp scatterlist entry */
-struct nx842_scatterlist {
-	int entry_nr; /* number of slentries */
-	struct nx842_slentry *entries; /* ptr to array of slentries */
-};
+	spin_lock(&nx842_driver_lock);
 
-/* Does not include sizeof(entry_nr) in the size */
-static inline unsigned long nx842_get_scatterlist_size(
-				struct nx842_scatterlist *sl)
-{
-	return sl->entry_nr * sizeof(struct nx842_slentry);
-}
-
-static inline unsigned long nx842_get_pa(void *addr)
-{
-	if (is_vmalloc_addr(addr))
-		return page_to_phys(vmalloc_to_page(addr))
-		       + offset_in_page(addr);
-	else
-		return __pa(addr);
-}
-
-static int nx842_build_scatterlist(unsigned long buf, int len,
-			struct nx842_scatterlist *sl)
-{
-	unsigned long nextpage;
-	struct nx842_slentry *entry;
-
-	sl->entry_nr = 0;
-
-	entry = sl->entries;
-	while (len) {
-		entry->ptr = nx842_get_pa((void *)buf);
-		nextpage = ALIGN(buf + 1, NX842_HW_PAGE_SIZE);
-		if (nextpage < buf + len) {
-			/* we aren't at the end yet */
-			if (IS_ALIGNED(buf, NX842_HW_PAGE_SIZE))
-				/* we are in the middle (or beginning) */
-				entry->len = NX842_HW_PAGE_SIZE;
-			else
-				/* we are at the beginning */
-				entry->len = nextpage - buf;
-		} else {
-			/* at the end */
-			entry->len = len;
-		}
-
-		len -= entry->len;
-		buf += entry->len;
-		sl->entry_nr++;
-		entry++;
+	if (nx842_driver) {
+		pr_err("can't register driver %s, already using driver %s\n",
+		       driver->owner->name, nx842_driver->owner->name);
+	} else {
+		pr_info("registering driver %s\n", driver->owner->name);
+		nx842_driver = driver;
 	}
 
-	return 0;
+	spin_unlock(&nx842_driver_lock);
 }
+EXPORT_SYMBOL_GPL(nx842_register_driver);
 
-/*
- * Working memory for software decompression
- */
-struct sw842_fifo {
-	union {
-		char f8[256][8];
-		char f4[512][4];
-	};
-	char f2[256][2];
-	unsigned char f84_full;
-	unsigned char f2_full;
-	unsigned char f8_count;
-	unsigned char f2_count;
-	unsigned int f4_count;
-};
-
-/*
- * Working memory for crypto API
- */
-struct nx842_workmem {
-	char bounce[PAGE_SIZE]; /* bounce buffer for decompression input */
-	union {
-		/* hardware working memory */
-		struct {
-			/* scatterlist */
-			char slin[SIZE_4K];
-			char slout[SIZE_4K];
-			/* coprocessor status/parameter block */
-			struct nx_csbcpb csbcpb;
-		};
-		/* software working memory */
-		struct sw842_fifo swfifo; /* software decompression fifo */
-	};
-};
-
-int nx842_get_workmem_size(void)
+void nx842_unregister_driver(struct nx842_driver *driver)
 {
-	return sizeof(struct nx842_workmem) + NX842_HW_PAGE_SIZE;
-}
-EXPORT_SYMBOL_GPL(nx842_get_workmem_size);
-
-int nx842_get_workmem_size_aligned(void)
-{
-	return sizeof(struct nx842_workmem);
-}
-EXPORT_SYMBOL_GPL(nx842_get_workmem_size_aligned);
+	spin_lock(&nx842_driver_lock);
 
-static int nx842_validate_result(struct device *dev,
-	struct cop_status_block *csb)
-{
-	/* The csb must be valid after returning from vio_h_cop_sync */
-	if (!NX842_CSBCBP_VALID_CHK(csb->valid)) {
-		dev_err(dev, "%s: cspcbp not valid upon completion.\n",
-				__func__);
-		dev_dbg(dev, "valid:0x%02x cs:0x%02x cc:0x%02x ce:0x%02x\n",
-				csb->valid,
-				csb->crb_seq_number,
-				csb->completion_code,
-				csb->completion_extension);
-		dev_dbg(dev, "processed_bytes:%d address:0x%016lx\n",
-				csb->processed_byte_count,
-				(unsigned long)csb->address);
-		return -EIO;
-	}
-
-	/* Check return values from the hardware in the CSB */
-	switch (csb->completion_code) {
-	case 0:	/* Completed without error */
-		break;
-	case 64: /* Target bytes > Source bytes during compression */
-	case 13: /* Output buffer too small */
-		dev_dbg(dev, "%s: Compression output larger than input\n",
-					__func__);
-		return -ENOSPC;
-	case 66: /* Input data contains an illegal template field */
-	case 67: /* Template indicates data past the end of the input stream */
-		dev_dbg(dev, "%s: Bad data for decompression (code:%d)\n",
-					__func__, csb->completion_code);
-		return -EINVAL;
-	default:
-		dev_dbg(dev, "%s: Unspecified error (code:%d)\n",
-					__func__, csb->completion_code);
-		return -EIO;
-	}
-
-	/* Hardware sanity check */
-	if (!NX842_CSBCPB_CE2(csb->completion_extension)) {
-		dev_err(dev, "%s: No error returned by hardware, but "
-				"data returned is unusable, contact support.\n"
-				"(Additional info: csbcbp->processed bytes "
-				"does not specify processed bytes for the "
-				"target buffer.)\n", __func__);
-		return -EIO;
+	if (nx842_driver == driver) {
+		pr_info("unregistering driver %s\n", driver->owner->name);
+		nx842_driver = NULL;
+	} else if (nx842_driver) {
+		pr_err("can't unregister driver %s, using driver %s\n",
+		       driver->owner->name, nx842_driver->owner->name);
+	} else {
+		pr_err("can't unregister driver %s, no driver in use\n",
+		       driver->owner->name);
 	}
 
-	return 0;
+	spin_unlock(&nx842_driver_lock);
 }
+EXPORT_SYMBOL_GPL(nx842_unregister_driver);
 
-/**
- * nx842_compress - Compress data using the 842 algorithm
- *
- * Compression provide by the NX842 coprocessor on IBM Power systems.
- * The input buffer is compressed and the result is stored in the
- * provided output buffer.
- *
- * Upon return from this function @outlen contains the length of the
- * compressed data.  If there is an error then @outlen will be 0 and an
- * error will be specified by the return code from this function.
- *
- * @in: Pointer to input buffer, must be page aligned
- * @inlen: Length of input buffer, must be PAGE_SIZE
- * @out: Pointer to output buffer
- * @outlen: Length of output buffer
- * @wrkmem: ptr to buffer for working memory, size determined by
- *          nx842_get_workmem_size()
- *
- * Returns:
- *   0		Success, output of length @outlen stored in the buffer at @out
- *   -ENOMEM	Unable to allocate internal buffers
- *   -ENOSPC	Output buffer is to small
- *   -EMSGSIZE	XXX Difficult to describe this limitation
- *   -EIO	Internal error
- *   -ENODEV	Hardware unavailable
- */
-int nx842_compress(const unsigned char *in, unsigned int inlen,
-		       unsigned char *out, unsigned int *outlen, void *wmem)
+static struct nx842_driver *get_driver(void)
 {
-	struct nx842_header *hdr;
-	struct nx842_devdata *local_devdata;
-	struct device *dev = NULL;
-	struct nx842_workmem *workmem;
-	struct nx842_scatterlist slin, slout;
-	struct nx_csbcpb *csbcpb;
-	int ret = 0, max_sync_size, i, bytesleft, size, hdrsize;
-	unsigned long inbuf, outbuf, padding;
-	struct vio_pfo_op op = {
-		.done = NULL,
-		.handle = 0,
-		.timeout = 0,
-	};
-	unsigned long start_time = get_tb();
-
-	/*
-	 * Make sure input buffer is 64k page aligned.  This is assumed since
-	 * this driver is designed for page compression only (for now).  This
-	 * is very nice since we can now use direct DDE(s) for the input and
-	 * the alignment is guaranteed.
-	*/
-	inbuf = (unsigned long)in;
-	if (!IS_ALIGNED(inbuf, PAGE_SIZE) || inlen != PAGE_SIZE)
-		return -EINVAL;
-
-	rcu_read_lock();
-	local_devdata = rcu_dereference(devdata);
-	if (!local_devdata || !local_devdata->dev) {
-		rcu_read_unlock();
-		return -ENODEV;
-	}
-	max_sync_size = local_devdata->max_sync_size;
-	dev = local_devdata->dev;
-
-	/* Create the header */
-	hdr = (struct nx842_header *)out;
-	hdr->blocks_nr = PAGE_SIZE / max_sync_size;
-	hdrsize = nx842_header_size(hdr);
-	outbuf = (unsigned long)out + hdrsize;
-	bytesleft = *outlen - hdrsize;
-
-	/* Init scatterlist */
-	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
-		NX842_HW_PAGE_SIZE);
-	slin.entries = (struct nx842_slentry *)workmem->slin;
-	slout.entries = (struct nx842_slentry *)workmem->slout;
-
-	/* Init operation */
-	op.flags = NX842_OP_COMPRESS;
-	csbcpb = &workmem->csbcpb;
-	memset(csbcpb, 0, sizeof(*csbcpb));
-	op.csbcpb = nx842_get_pa(csbcpb);
-	op.out = nx842_get_pa(slout.entries);
+	struct nx842_driver *driver = NULL;
 
-	for (i = 0; i < hdr->blocks_nr; i++) {
-		/*
-		 * Aligning the output blocks to 128 bytes does waste space,
-		 * but it prevents the need for bounce buffers and memory
-		 * copies.  It also simplifies the code a lot.  In the worst
-		 * case (64k page, 4k max_sync_size), you lose up to
-		 * (128*16)/64k = ~3% the compression factor. For 64k
-		 * max_sync_size, the loss would be at most 128/64k = ~0.2%.
-		 */
-		padding = ALIGN(outbuf, IO_BUFFER_ALIGN) - outbuf;
-		outbuf += padding;
-		bytesleft -= padding;
-		if (i == 0)
-			/* save offset into first block in header */
-			hdr->offset = padding + hdrsize;
+	spin_lock(&nx842_driver_lock);
 
-		if (bytesleft <= 0) {
-			ret = -ENOSPC;
-			goto unlock;
-		}
+	driver = nx842_driver;
 
-		/*
-		 * NOTE: If the default max_sync_size is changed from 4k
-		 * to 64k, remove the "likely" case below, since a
-		 * scatterlist will always be needed.
-		 */
-		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
-			/* Create direct DDE */
-			op.in = nx842_get_pa((void *)inbuf);
-			op.inlen = max_sync_size;
+	if (driver && !try_module_get(driver->owner))
+		driver = NULL;
 
-		} else {
-			/* Create indirect DDE (scatterlist) */
-			nx842_build_scatterlist(inbuf, max_sync_size, &slin);
-			op.in = nx842_get_pa(slin.entries);
-			op.inlen = -nx842_get_scatterlist_size(&slin);
-		}
+	spin_unlock(&nx842_driver_lock);
 
-		/*
-		 * If max_sync_size != NX842_HW_PAGE_SIZE, an indirect
-		 * DDE is required for the outbuf.
-		 * If max_sync_size == NX842_HW_PAGE_SIZE, outbuf must
-		 * also be page aligned (1 in 128/4k=32 chance) in order
-		 * to use a direct DDE.
-		 * This is unlikely, just use an indirect DDE always.
-		 */
-		nx842_build_scatterlist(outbuf,
-			min(bytesleft, max_sync_size), &slout);
-		/* op.out set before loop */
-		op.outlen = -nx842_get_scatterlist_size(&slout);
-
-		/* Send request to pHyp */
-		ret = vio_h_cop_sync(local_devdata->vdev, &op);
-
-		/* Check for pHyp error */
-		if (ret) {
-			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
-				__func__, ret, op.hcall_err);
-			ret = -EIO;
-			goto unlock;
-		}
-
-		/* Check for hardware error */
-		ret = nx842_validate_result(dev, &csbcpb->csb);
-		if (ret && ret != -ENOSPC)
-			goto unlock;
-
-		/* Handle incompressible data */
-		if (unlikely(ret == -ENOSPC)) {
-			if (bytesleft < max_sync_size) {
-				/*
-				 * Not enough space left in the output buffer
-				 * to store uncompressed block
-				 */
-				goto unlock;
-			} else {
-				/* Store incompressible block */
-				memcpy((void *)outbuf, (void *)inbuf,
-					max_sync_size);
-				hdr->sizes[i] = -max_sync_size;
-				outbuf += max_sync_size;
-				bytesleft -= max_sync_size;
-				/* Reset ret, incompressible data handled */
-				ret = 0;
-			}
-		} else {
-			/* Normal case, compression was successful */
-			size = csbcpb->csb.processed_byte_count;
-			dev_dbg(dev, "%s: processed_bytes=%d\n",
-				__func__, size);
-			hdr->sizes[i] = size;
-			outbuf += size;
-			bytesleft -= size;
-		}
-
-		inbuf += max_sync_size;
-	}
-
-	*outlen = (unsigned int)(outbuf - (unsigned long)out);
-
-unlock:
-	if (ret)
-		nx842_inc_comp_failed(local_devdata);
-	else {
-		nx842_inc_comp_complete(local_devdata);
-		ibm_nx842_incr_hist(local_devdata->counters->comp_times,
-			(get_tb() - start_time) / tb_ticks_per_usec);
-	}
-	rcu_read_unlock();
-	return ret;
+	return driver;
 }
-EXPORT_SYMBOL_GPL(nx842_compress);
-
-static int sw842_decompress(const unsigned char *, int, unsigned char *, int *,
-			const void *);
 
-/**
- * nx842_decompress - Decompress data using the 842 algorithm
- *
- * Decompression provide by the NX842 coprocessor on IBM Power systems.
- * The input buffer is decompressed and the result is stored in the
- * provided output buffer.  The size allocated to the output buffer is
- * provided by the caller of this function in @outlen.  Upon return from
- * this function @outlen contains the length of the decompressed data.
- * If there is an error then @outlen will be 0 and an error will be
- * specified by the return code from this function.
- *
- * @in: Pointer to input buffer, will use bounce buffer if not 128 byte
- *      aligned
- * @inlen: Length of input buffer
- * @out: Pointer to output buffer, must be page aligned
- * @outlen: Length of output buffer, must be PAGE_SIZE
- * @wrkmem: ptr to buffer for working memory, size determined by
- *          nx842_get_workmem_size()
- *
- * Returns:
- *   0		Success, output of length @outlen stored in the buffer at @out
- *   -ENODEV	Hardware decompression device is unavailable
- *   -ENOMEM	Unable to allocate internal buffers
- *   -ENOSPC	Output buffer is to small
- *   -EINVAL	Bad input data encountered when attempting decompress
- *   -EIO	Internal error
- */
-int nx842_decompress(const unsigned char *in, unsigned int inlen,
-			 unsigned char *out, unsigned int *outlen, void *wmem)
+static void put_driver(struct nx842_driver *driver)
 {
-	struct nx842_header *hdr;
-	struct nx842_devdata *local_devdata;
-	struct device *dev = NULL;
-	struct nx842_workmem *workmem;
-	struct nx842_scatterlist slin, slout;
-	struct nx_csbcpb *csbcpb;
-	int ret = 0, i, size, max_sync_size;
-	unsigned long inbuf, outbuf;
-	struct vio_pfo_op op = {
-		.done = NULL,
-		.handle = 0,
-		.timeout = 0,
-	};
-	unsigned long start_time = get_tb();
-
-	/* Ensure page alignment and size */
-	outbuf = (unsigned long)out;
-	if (!IS_ALIGNED(outbuf, PAGE_SIZE) || *outlen != PAGE_SIZE)
-		return -EINVAL;
-
-	rcu_read_lock();
-	local_devdata = rcu_dereference(devdata);
-	if (local_devdata)
-		dev = local_devdata->dev;
-
-	/* Get header */
-	hdr = (struct nx842_header *)in;
-
-	workmem = (struct nx842_workmem *)ALIGN((unsigned long)wmem,
-		NX842_HW_PAGE_SIZE);
-
-	inbuf = (unsigned long)in + hdr->offset;
-	if (likely(!IS_ALIGNED(inbuf, IO_BUFFER_ALIGN))) {
-		/* Copy block(s) into bounce buffer for alignment */
-		memcpy(workmem->bounce, in + hdr->offset, inlen - hdr->offset);
-		inbuf = (unsigned long)workmem->bounce;
-	}
-
-	/* Init scatterlist */
-	slin.entries = (struct nx842_slentry *)workmem->slin;
-	slout.entries = (struct nx842_slentry *)workmem->slout;
-
-	/* Init operation */
-	op.flags = NX842_OP_DECOMPRESS;
-	csbcpb = &workmem->csbcpb;
-	memset(csbcpb, 0, sizeof(*csbcpb));
-	op.csbcpb = nx842_get_pa(csbcpb);
-
-	/*
-	 * max_sync_size may have changed since compression,
-	 * so we can't read it from the device info. We need
-	 * to derive it from hdr->blocks_nr.
-	 */
-	max_sync_size = PAGE_SIZE / hdr->blocks_nr;
-
-	for (i = 0; i < hdr->blocks_nr; i++) {
-		/* Skip padding */
-		inbuf = ALIGN(inbuf, IO_BUFFER_ALIGN);
-
-		if (hdr->sizes[i] < 0) {
-			/* Negative sizes indicate uncompressed data blocks */
-			size = abs(hdr->sizes[i]);
-			memcpy((void *)outbuf, (void *)inbuf, size);
-			outbuf += size;
-			inbuf += size;
-			continue;
-		}
-
-		if (!dev)
-			goto sw;
-
-		/*
-		 * The better the compression, the more likely the "likely"
-		 * case becomes.
-		 */
-		if (likely((inbuf & NX842_HW_PAGE_MASK) ==
-			((inbuf + hdr->sizes[i] - 1) & NX842_HW_PAGE_MASK))) {
-			/* Create direct DDE */
-			op.in = nx842_get_pa((void *)inbuf);
-			op.inlen = hdr->sizes[i];
-		} else {
-			/* Create indirect DDE (scatterlist) */
-			nx842_build_scatterlist(inbuf, hdr->sizes[i] , &slin);
-			op.in = nx842_get_pa(slin.entries);
-			op.inlen = -nx842_get_scatterlist_size(&slin);
-		}
-
-		/*
-		 * NOTE: If the default max_sync_size is changed from 4k
-		 * to 64k, remove the "likely" case below, since a
-		 * scatterlist will always be needed.
-		 */
-		if (likely(max_sync_size == NX842_HW_PAGE_SIZE)) {
-			/* Create direct DDE */
-			op.out = nx842_get_pa((void *)outbuf);
-			op.outlen = max_sync_size;
-		} else {
-			/* Create indirect DDE (scatterlist) */
-			nx842_build_scatterlist(outbuf, max_sync_size, &slout);
-			op.out = nx842_get_pa(slout.entries);
-			op.outlen = -nx842_get_scatterlist_size(&slout);
-		}
-
-		/* Send request to pHyp */
-		ret = vio_h_cop_sync(local_devdata->vdev, &op);
-
-		/* Check for pHyp error */
-		if (ret) {
-			dev_dbg(dev, "%s: vio_h_cop_sync error (ret=%d, hret=%ld)\n",
-				__func__, ret, op.hcall_err);
-			dev = NULL;
-			goto sw;
-		}
-
-		/* Check for hardware error */
-		ret = nx842_validate_result(dev, &csbcpb->csb);
-		if (ret) {
-			dev = NULL;
-			goto sw;
-		}
-
-		/* HW decompression success */
-		inbuf += hdr->sizes[i];
-		outbuf += csbcpb->csb.processed_byte_count;
-		continue;
-
-sw:
-		/* software decompression */
-		size = max_sync_size;
-		ret = sw842_decompress(
-			(unsigned char *)inbuf, hdr->sizes[i],
-			(unsigned char *)outbuf, &size, wmem);
-		if (ret)
-			pr_debug("%s: sw842_decompress failed with %d\n",
-				__func__, ret);
-
-		if (ret) {
-			if (ret != -ENOSPC && ret != -EINVAL &&
-					ret != -EMSGSIZE)
-				ret = -EIO;
-			goto unlock;
-		}
-
-		/* SW decompression success */
-		inbuf += hdr->sizes[i];
-		outbuf += size;
-	}
-
-	*outlen = (unsigned int)(outbuf - (unsigned long)out);
-
-unlock:
-	if (ret)
-		/* decompress fail */
-		nx842_inc_decomp_failed(local_devdata);
-	else {
-		if (!dev)
-			/* software decompress */
-			nx842_inc_swdecomp(local_devdata);
-		nx842_inc_decomp_complete(local_devdata);
-		ibm_nx842_incr_hist(local_devdata->counters->decomp_times,
-			(get_tb() - start_time) / tb_ticks_per_usec);
-	}
-
-	rcu_read_unlock();
-	return ret;
+	module_put(driver->owner);
 }
-EXPORT_SYMBOL_GPL(nx842_decompress);
 
 /**
- * nx842_OF_set_defaults -- Set default (disabled) values for devdata
- *
- * @devdata - struct nx842_devdata to update
- *
- * Returns:
- *  0 on success
- *  -ENOENT if @devdata ptr is NULL
+ * nx842_constraints
+ *
+ * This provides the driver's constraints.  Different nx842 implementations
+ * may have varying requirements.  The constraints are:
+ *   @alignment:	All buffers should be aligned to this
+ *   @multiple:		All buffer lengths should be a multiple of this
+ *   @minimum:		Buffer lengths must not be less than this amount
+ *   @maximum:		Buffer lengths must not be more than this amount
+ *
+ * The constraints apply to all buffers and lengths, both input and output,
+ * for both compression and decompression, except for the minimum which
+ * only applies to compression input and decompression output; the
+ * compressed data can be less than the minimum constraint.  It can be
+ * assumed that compressed data will always adhere to the multiple
+ * constraint.
+ *
+ * The driver may succeed even if these constraints are violated;
+ * however the driver can return failure or suffer reduced performance
+ * if any constraint is not met.
  */
-static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
+int nx842_constraints(struct nx842_constraints *c)
 {
-	if (devdata) {
-		devdata->max_sync_size = 0;
-		devdata->max_sync_sg = 0;
-		devdata->max_sg_len = 0;
-		devdata->status = UNAVAILABLE;
-		return 0;
-	} else
-		return -ENOENT;
-}
-
-/**
- * nx842_OF_upd_status -- Update the device info from OF status prop
- *
- * The status property indicates if the accelerator is enabled.  If the
- * device is in the OF tree it indicates that the hardware is present.
- * The status field indicates if the device is enabled when the status
- * is 'okay'.  Otherwise the device driver will be disabled.
- *
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 - Device is available
- *  -EINVAL - Device is not available
- */
-static int nx842_OF_upd_status(struct nx842_devdata *devdata,
-					struct property *prop) {
+	struct nx842_driver *driver = get_driver();
 	int ret = 0;
-	const char *status = (const char *)prop->value;
-
-	if (!strncmp(status, "okay", (size_t)prop->length)) {
-		devdata->status = AVAILABLE;
-	} else {
-		dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
-				__func__, status);
-		devdata->status = UNAVAILABLE;
-	}
-
-	return ret;
-}
-
-/**
- * nx842_OF_upd_maxsglen -- Update the device info from OF maxsglen prop
- *
- * Definition of the 'ibm,max-sg-len' OF property:
- *  This field indicates the maximum byte length of a scatter list
- *  for the platform facility. It is a single cell encoded as with encode-int.
- *
- * Example:
- *  # od -x ibm,max-sg-len
- *  0000000 0000 0ff0
- *
- *  In this example, the maximum byte length of a scatter list is
- *  0x0ff0 (4,080).
- *
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 on success
- *  -EINVAL on failure
- */
-static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
-					struct property *prop) {
-	int ret = 0;
-	const int *maxsglen = prop->value;
-
-	if (prop->length != sizeof(*maxsglen)) {
-		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sg-len property\n", __func__);
-		dev_dbg(devdata->dev, "%s: ibm,max-sg-len is %d bytes long, expected %lu bytes\n", __func__,
-				prop->length, sizeof(*maxsglen));
-		ret = -EINVAL;
-	} else {
-		devdata->max_sg_len = (unsigned int)min(*maxsglen,
-				(int)NX842_HW_PAGE_SIZE);
-	}
 
-	return ret;
-}
-
-/**
- * nx842_OF_upd_maxsyncop -- Update the device info from OF maxsyncop prop
- *
- * Definition of the 'ibm,max-sync-cop' OF property:
- *  Two series of cells.  The first series of cells represents the maximums
- *  that can be synchronously compressed. The second series of cells
- *  represents the maximums that can be synchronously decompressed.
- *  1. The first cell in each series contains the count of the number of
- *     data length, scatter list elements pairs that follow – each being
- *     of the form
- *    a. One cell data byte length
- *    b. One cell total number of scatter list elements
- *
- * Example:
- *  # od -x ibm,max-sync-cop
- *  0000000 0000 0001 0000 1000 0000 01fe 0000 0001
- *  0000020 0000 1000 0000 01fe
- *
- *  In this example, compression supports 0x1000 (4,096) data byte length
- *  and 0x1fe (510) total scatter list elements.  Decompression supports
- *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
- *  elements.
- *
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
- *
- * Returns:
- *  0 on success
- *  -EINVAL on failure
- */
-static int nx842_OF_upd_maxsyncop(struct nx842_devdata *devdata,
-					struct property *prop) {
-	int ret = 0;
-	const struct maxsynccop_t {
-		int comp_elements;
-		int comp_data_limit;
-		int comp_sg_limit;
-		int decomp_elements;
-		int decomp_data_limit;
-		int decomp_sg_limit;
-	} *maxsynccop;
-
-	if (prop->length != sizeof(*maxsynccop)) {
-		dev_err(devdata->dev, "%s: unexpected format for ibm,max-sync-cop property\n", __func__);
-		dev_dbg(devdata->dev, "%s: ibm,max-sync-cop is %d bytes long, expected %lu bytes\n", __func__, prop->length,
-				sizeof(*maxsynccop));
-		ret = -EINVAL;
-		goto out;
-	}
-
-	maxsynccop = (const struct maxsynccop_t *)prop->value;
-
-	/* Use one limit rather than separate limits for compression and
-	 * decompression. Set a maximum for this so as not to exceed the
-	 * size that the header can support and round the value down to
-	 * the hardware page size (4K) */
-	devdata->max_sync_size =
-			(unsigned int)min(maxsynccop->comp_data_limit,
-					maxsynccop->decomp_data_limit);
+	if (!driver)
+		return -ENODEV;
 
-	devdata->max_sync_size = min_t(unsigned int, devdata->max_sync_size,
-					SIZE_64K);
+	BUG_ON(!c);
+	memcpy(c, driver->constraints, sizeof(*c));
 
-	if (devdata->max_sync_size < SIZE_4K) {
-		dev_err(devdata->dev, "%s: hardware max data size (%u) is "
-				"less than the driver minimum, unable to use "
-				"the hardware device\n",
-				__func__, devdata->max_sync_size);
-		ret = -EINVAL;
-		goto out;
-	}
+	put_driver(driver);
 
-	devdata->max_sync_sg = (unsigned int)min(maxsynccop->comp_sg_limit,
-						maxsynccop->decomp_sg_limit);
-	if (devdata->max_sync_sg < 1) {
-		dev_err(devdata->dev, "%s: hardware max sg size (%u) is "
-				"less than the driver minimum, unable to use "
-				"the hardware device\n",
-				__func__, devdata->max_sync_sg);
-		ret = -EINVAL;
-		goto out;
-	}
-
-out:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nx842_constraints);
 
-/**
- *
- * nx842_OF_upd -- Handle OF properties updates for the device.
- *
- * Set all properties from the OF tree.  Optionally, a new property
- * can be provided by the @new_prop pointer to overwrite an existing value.
- * The device will remain disabled until all values are valid, this function
- * will return an error for updates unless all values are valid.
- *
- * @new_prop: If not NULL, this property is being updated.  If NULL, update
- *  all properties from the current values in the OF tree.
- *
- * Returns:
- *  0 - Success
- *  -ENOMEM - Could not allocate memory for new devdata structure
- *  -EINVAL - property value not found, new_prop is not a recognized
- *	property for the device or property value is not valid.
- *  -ENODEV - Device is not available
- */
-static int nx842_OF_upd(struct property *new_prop)
+int nx842_compress(const unsigned char *in, unsigned int in_len,
+		   unsigned char *out, unsigned int *out_len,
+		   void *wrkmem)
 {
-	struct nx842_devdata *old_devdata = NULL;
-	struct nx842_devdata *new_devdata = NULL;
-	struct device_node *of_node = NULL;
-	struct property *status = NULL;
-	struct property *maxsglen = NULL;
-	struct property *maxsyncop = NULL;
-	int ret = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&devdata_mutex, flags);
-	old_devdata = rcu_dereference_check(devdata,
-			lockdep_is_held(&devdata_mutex));
-	if (old_devdata)
-		of_node = old_devdata->dev->of_node;
+	struct nx842_driver *driver = get_driver();
+	int ret;
 
-	if (!old_devdata || !of_node) {
-		pr_err("%s: device is not available\n", __func__);
-		spin_unlock_irqrestore(&devdata_mutex, flags);
+	if (!driver)
 		return -ENODEV;
-	}
-
-	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-	if (!new_devdata) {
-		dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
-		ret = -ENOMEM;
-		goto error_out;
-	}
 
-	memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
-	new_devdata->counters = old_devdata->counters;
+	ret = driver->compress(in, in_len, out, out_len, wrkmem);
 
-	/* Set ptrs for existing properties */
-	status = of_find_property(of_node, "status", NULL);
-	maxsglen = of_find_property(of_node, "ibm,max-sg-len", NULL);
-	maxsyncop = of_find_property(of_node, "ibm,max-sync-cop", NULL);
-	if (!status || !maxsglen || !maxsyncop) {
-		dev_err(old_devdata->dev, "%s: Could not locate device properties\n", __func__);
-		ret = -EINVAL;
-		goto error_out;
-	}
-
-	/*
-	 * If this is a property update, there are only certain properties that
-	 * we care about. Bail if it isn't in the below list
-	 */
-	if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
-		         strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
-		         strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
-		goto out;
-
-	/* Perform property updates */
-	ret = nx842_OF_upd_status(new_devdata, status);
-	if (ret)
-		goto error_out;
-
-	ret = nx842_OF_upd_maxsglen(new_devdata, maxsglen);
-	if (ret)
-		goto error_out;
-
-	ret = nx842_OF_upd_maxsyncop(new_devdata, maxsyncop);
-	if (ret)
-		goto error_out;
-
-out:
-	dev_info(old_devdata->dev, "%s: max_sync_size new:%u old:%u\n",
-			__func__, new_devdata->max_sync_size,
-			old_devdata->max_sync_size);
-	dev_info(old_devdata->dev, "%s: max_sync_sg new:%u old:%u\n",
-			__func__, new_devdata->max_sync_sg,
-			old_devdata->max_sync_sg);
-	dev_info(old_devdata->dev, "%s: max_sg_len new:%u old:%u\n",
-			__func__, new_devdata->max_sg_len,
-			old_devdata->max_sg_len);
-
-	rcu_assign_pointer(devdata, new_devdata);
-	spin_unlock_irqrestore(&devdata_mutex, flags);
-	synchronize_rcu();
-	dev_set_drvdata(new_devdata->dev, new_devdata);
-	kfree(old_devdata);
-	return 0;
-
-error_out:
-	if (new_devdata) {
-		dev_info(old_devdata->dev, "%s: device disabled\n", __func__);
-		nx842_OF_set_defaults(new_devdata);
-		rcu_assign_pointer(devdata, new_devdata);
-		spin_unlock_irqrestore(&devdata_mutex, flags);
-		synchronize_rcu();
-		dev_set_drvdata(new_devdata->dev, new_devdata);
-		kfree(old_devdata);
-	} else {
-		dev_err(old_devdata->dev, "%s: could not update driver from hardware\n", __func__);
-		spin_unlock_irqrestore(&devdata_mutex, flags);
-	}
+	put_driver(driver);
 
-	if (!ret)
-		ret = -EINVAL;
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nx842_compress);
 
-/**
- * nx842_OF_notifier - Process updates to OF properties for the device
- *
- * @np: notifier block
- * @action: notifier action
- * @update: struct pSeries_reconfig_prop_update pointer if action is
- *	PSERIES_UPDATE_PROPERTY
- *
- * Returns:
- *	NOTIFY_OK on success
- *	NOTIFY_BAD encoded with error number on failure, use
- *		notifier_to_errno() to decode this value
- */
-static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
-			     void *data)
-{
-	struct of_reconfig_data *upd = data;
-	struct nx842_devdata *local_devdata;
-	struct device_node *node = NULL;
-
-	rcu_read_lock();
-	local_devdata = rcu_dereference(devdata);
-	if (local_devdata)
-		node = local_devdata->dev->of_node;
-
-	if (local_devdata &&
-			action == OF_RECONFIG_UPDATE_PROPERTY &&
-			!strcmp(upd->dn->name, node->name)) {
-		rcu_read_unlock();
-		nx842_OF_upd(upd->prop);
-	} else
-		rcu_read_unlock();
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block nx842_of_nb = {
-	.notifier_call = nx842_OF_notifier,
-};
-
-#define nx842_counter_read(_name)					\
-static ssize_t nx842_##_name##_show(struct device *dev,		\
-		struct device_attribute *attr,				\
-		char *buf) {						\
-	struct nx842_devdata *local_devdata;			\
-	int p = 0;							\
-	rcu_read_lock();						\
-	local_devdata = rcu_dereference(devdata);			\
-	if (local_devdata)						\
-		p = snprintf(buf, PAGE_SIZE, "%ld\n",			\
-		       atomic64_read(&local_devdata->counters->_name));	\
-	rcu_read_unlock();						\
-	return p;							\
-}
-
-#define NX842DEV_COUNTER_ATTR_RO(_name)					\
-	nx842_counter_read(_name);					\
-	static struct device_attribute dev_attr_##_name = __ATTR(_name,	\
-						0444,			\
-						nx842_##_name##_show,\
-						NULL);
-
-NX842DEV_COUNTER_ATTR_RO(comp_complete);
-NX842DEV_COUNTER_ATTR_RO(comp_failed);
-NX842DEV_COUNTER_ATTR_RO(decomp_complete);
-NX842DEV_COUNTER_ATTR_RO(decomp_failed);
-NX842DEV_COUNTER_ATTR_RO(swdecomp);
-
-static ssize_t nx842_timehist_show(struct device *,
-		struct device_attribute *, char *);
-
-static struct device_attribute dev_attr_comp_times = __ATTR(comp_times, 0444,
-		nx842_timehist_show, NULL);
-static struct device_attribute dev_attr_decomp_times = __ATTR(decomp_times,
-		0444, nx842_timehist_show, NULL);
-
-static ssize_t nx842_timehist_show(struct device *dev,
-		struct device_attribute *attr, char *buf) {
-	char *p = buf;
-	struct nx842_devdata *local_devdata;
-	atomic64_t *times;
-	int bytes_remain = PAGE_SIZE;
-	int bytes;
-	int i;
-
-	rcu_read_lock();
-	local_devdata = rcu_dereference(devdata);
-	if (!local_devdata) {
-		rcu_read_unlock();
-		return 0;
-	}
-
-	if (attr == &dev_attr_comp_times)
-		times = local_devdata->counters->comp_times;
-	else if (attr == &dev_attr_decomp_times)
-		times = local_devdata->counters->decomp_times;
-	else {
-		rcu_read_unlock();
-		return 0;
-	}
-
-	for (i = 0; i < (NX842_HIST_SLOTS - 2); i++) {
-		bytes = snprintf(p, bytes_remain, "%u-%uus:\t%ld\n",
-			       i ? (2<<(i-1)) : 0, (2<<i)-1,
-			       atomic64_read(&times[i]));
-		bytes_remain -= bytes;
-		p += bytes;
-	}
-	/* The last bucket holds everything over
-	 * 2<<(NX842_HIST_SLOTS - 2) us */
-	bytes = snprintf(p, bytes_remain, "%uus - :\t%ld\n",
-			2<<(NX842_HIST_SLOTS - 2),
-			atomic64_read(&times[(NX842_HIST_SLOTS - 1)]));
-	p += bytes;
-
-	rcu_read_unlock();
-	return p - buf;
-}
-
-static struct attribute *nx842_sysfs_entries[] = {
-	&dev_attr_comp_complete.attr,
-	&dev_attr_comp_failed.attr,
-	&dev_attr_decomp_complete.attr,
-	&dev_attr_decomp_failed.attr,
-	&dev_attr_swdecomp.attr,
-	&dev_attr_comp_times.attr,
-	&dev_attr_decomp_times.attr,
-	NULL,
-};
-
-static struct attribute_group nx842_attribute_group = {
-	.name = NULL,		/* put in device directory */
-	.attrs = nx842_sysfs_entries,
-};
-
-static int __init nx842_probe(struct vio_dev *viodev,
-				  const struct vio_device_id *id)
+int nx842_decompress(const unsigned char *in, unsigned int in_len,
+		     unsigned char *out, unsigned int *out_len,
+		     void *wrkmem)
 {
-	struct nx842_devdata *old_devdata, *new_devdata = NULL;
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&devdata_mutex, flags);
-	old_devdata = rcu_dereference_check(devdata,
-			lockdep_is_held(&devdata_mutex));
-
-	if (old_devdata && old_devdata->vdev != NULL) {
-		dev_err(&viodev->dev, "%s: Attempt to register more than one instance of the hardware\n", __func__);
-		ret = -1;
-		goto error_unlock;
-	}
-
-	dev_set_drvdata(&viodev->dev, NULL);
-
-	new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-	if (!new_devdata) {
-		dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
-		ret = -ENOMEM;
-		goto error_unlock;
-	}
-
-	new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
-			GFP_NOFS);
-	if (!new_devdata->counters) {
-		dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
-		ret = -ENOMEM;
-		goto error_unlock;
-	}
-
-	new_devdata->vdev = viodev;
-	new_devdata->dev = &viodev->dev;
-	nx842_OF_set_defaults(new_devdata);
-
-	rcu_assign_pointer(devdata, new_devdata);
-	spin_unlock_irqrestore(&devdata_mutex, flags);
-	synchronize_rcu();
-	kfree(old_devdata);
-
-	of_reconfig_notifier_register(&nx842_of_nb);
-
-	ret = nx842_OF_upd(NULL);
-	if (ret && ret != -ENODEV) {
-		dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
-		ret = -1;
-		goto error;
-	}
+	struct nx842_driver *driver = get_driver();
+	int ret;
 
-	rcu_read_lock();
-	dev_set_drvdata(&viodev->dev, rcu_dereference(devdata));
-	rcu_read_unlock();
+	if (!driver)
+		return -ENODEV;
 
-	if (sysfs_create_group(&viodev->dev.kobj, &nx842_attribute_group)) {
-		dev_err(&viodev->dev, "could not create sysfs device attributes\n");
-		ret = -1;
-		goto error;
-	}
+	ret = driver->decompress(in, in_len, out, out_len, wrkmem);
 
-	return 0;
+	put_driver(driver);
 
-error_unlock:
-	spin_unlock_irqrestore(&devdata_mutex, flags);
-	if (new_devdata)
-		kfree(new_devdata->counters);
-	kfree(new_devdata);
-error:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(nx842_decompress);
 
-static int __exit nx842_remove(struct vio_dev *viodev)
+static __init int nx842_init(void)
 {
-	struct nx842_devdata *old_devdata;
-	unsigned long flags;
-
-	pr_info("Removing IBM Power 842 compression device\n");
-	sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
-
-	spin_lock_irqsave(&devdata_mutex, flags);
-	old_devdata = rcu_dereference_check(devdata,
-			lockdep_is_held(&devdata_mutex));
-	of_reconfig_notifier_unregister(&nx842_of_nb);
-	RCU_INIT_POINTER(devdata, NULL);
-	spin_unlock_irqrestore(&devdata_mutex, flags);
-	synchronize_rcu();
-	dev_set_drvdata(&viodev->dev, NULL);
-	if (old_devdata)
-		kfree(old_devdata->counters);
-	kfree(old_devdata);
-	return 0;
-}
-
-static struct vio_device_id nx842_driver_ids[] = {
-	{"ibm,compression-v1", "ibm,compression"},
-	{"", ""},
-};
-
-static struct vio_driver nx842_driver = {
-	.name = MODULE_NAME,
-	.probe = nx842_probe,
-	.remove = __exit_p(nx842_remove),
-	.get_desired_dma = nx842_get_desired_dma,
-	.id_table = nx842_driver_ids,
-};
+	pr_info("loading\n");
 
-static int __init nx842_init(void)
-{
-	struct nx842_devdata *new_devdata;
-	pr_info("Registering IBM Power 842 compression driver\n");
+	if (of_find_compatible_node(NULL, NULL, NX842_POWERNV_COMPAT_NAME))
+		request_module_nowait(NX842_POWERNV_MODULE_NAME);
+	else if (of_find_compatible_node(NULL, NULL, NX842_PSERIES_COMPAT_NAME))
+		request_module_nowait(NX842_PSERIES_MODULE_NAME);
+	else
+		pr_err("no nx842 driver found.\n");
 
-	RCU_INIT_POINTER(devdata, NULL);
-	new_devdata = kzalloc(sizeof(*new_devdata), GFP_KERNEL);
-	if (!new_devdata) {
-		pr_err("Could not allocate memory for device data\n");
-		return -ENOMEM;
-	}
-	new_devdata->status = UNAVAILABLE;
-	RCU_INIT_POINTER(devdata, new_devdata);
+	pr_info("loaded\n");
 
-	return vio_register_driver(&nx842_driver);
+	return 0;
 }
-
 module_init(nx842_init);
 
 static void __exit nx842_exit(void)
 {
-	struct nx842_devdata *old_devdata;
-	unsigned long flags;
-
-	pr_info("Exiting IBM Power 842 compression driver\n");
-	spin_lock_irqsave(&devdata_mutex, flags);
-	old_devdata = rcu_dereference_check(devdata,
-			lockdep_is_held(&devdata_mutex));
-	RCU_INIT_POINTER(devdata, NULL);
-	spin_unlock_irqrestore(&devdata_mutex, flags);
-	synchronize_rcu();
-	if (old_devdata)
-		dev_set_drvdata(old_devdata->dev, NULL);
-	kfree(old_devdata);
-	vio_unregister_driver(&nx842_driver);
+	pr_info("NX842 unloaded\n");
 }
-
 module_exit(nx842_exit);
-
-/*********************************
- * 842 software decompressor
-*********************************/
-typedef int (*sw842_template_op)(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-
-static int sw842_data8(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-static int sw842_data4(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-static int sw842_data2(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-static int sw842_ptr8(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-static int sw842_ptr4(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-static int sw842_ptr2(const char **, int *, unsigned char **,
-						struct sw842_fifo *);
-
-/* special templates */
-#define SW842_TMPL_REPEAT 0x1B
-#define SW842_TMPL_ZEROS 0x1C
-#define SW842_TMPL_EOF 0x1E
-
-static sw842_template_op sw842_tmpl_ops[26][4] = {
-	{ sw842_data8, NULL}, /* 0 (00000) */
-	{ sw842_data4, sw842_data2, sw842_ptr2,  NULL},
-	{ sw842_data4, sw842_ptr2,  sw842_data2, NULL},
-	{ sw842_data4, sw842_ptr2,  sw842_ptr2,  NULL},
-	{ sw842_data4, sw842_ptr4,  NULL},
-	{ sw842_data2, sw842_ptr2,  sw842_data4, NULL},
-	{ sw842_data2, sw842_ptr2,  sw842_data2, sw842_ptr2},
-	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_data2},
-	{ sw842_data2, sw842_ptr2,  sw842_ptr2,  sw842_ptr2,},
-	{ sw842_data2, sw842_ptr2,  sw842_ptr4,  NULL},
-	{ sw842_ptr2,  sw842_data2, sw842_data4, NULL}, /* 10 (01010) */
-	{ sw842_ptr2,  sw842_data4, sw842_ptr2,  NULL},
-	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_data2},
-	{ sw842_ptr2,  sw842_data2, sw842_ptr2,  sw842_ptr2},
-	{ sw842_ptr2,  sw842_data2, sw842_ptr4,  NULL},
-	{ sw842_ptr2,  sw842_ptr2,  sw842_data4, NULL},
-	{ sw842_ptr2,  sw842_ptr2,  sw842_data2, sw842_ptr2},
-	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_data2},
-	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr2,  sw842_ptr2},
-	{ sw842_ptr2,  sw842_ptr2,  sw842_ptr4,  NULL},
-	{ sw842_ptr4,  sw842_data4, NULL}, /* 20 (10100) */
-	{ sw842_ptr4,  sw842_data2, sw842_ptr2,  NULL},
-	{ sw842_ptr4,  sw842_ptr2,  sw842_data2, NULL},
-	{ sw842_ptr4,  sw842_ptr2,  sw842_ptr2,  NULL},
-	{ sw842_ptr4,  sw842_ptr4,  NULL},
-	{ sw842_ptr8,  NULL}
-};
-
-/* Software decompress helpers */
-
-static uint8_t sw842_get_byte(const char *buf, int bit)
-{
-	uint8_t tmpl;
-	uint16_t tmp;
-	tmp = htons(*(uint16_t *)(buf));
-	tmp = (uint16_t)(tmp << bit);
-	tmp = ntohs(tmp);
-	memcpy(&tmpl, &tmp, 1);
-	return tmpl;
-}
-
-static uint8_t sw842_get_template(const char **buf, int *bit)
-{
-	uint8_t byte;
-	byte = sw842_get_byte(*buf, *bit);
-	byte = byte >> 3;
-	byte &= 0x1F;
-	*buf += (*bit + 5) / 8;
-	*bit = (*bit + 5) % 8;
-	return byte;
-}
-
-/* repeat_count happens to be 5-bit too (like the template) */
-static uint8_t sw842_get_repeat_count(const char **buf, int *bit)
-{
-	uint8_t byte;
-	byte = sw842_get_byte(*buf, *bit);
-	byte = byte >> 2;
-	byte &= 0x3F;
-	*buf += (*bit + 6) / 8;
-	*bit = (*bit + 6) % 8;
-	return byte;
-}
-
-static uint8_t sw842_get_ptr2(const char **buf, int *bit)
-{
-	uint8_t ptr;
-	ptr = sw842_get_byte(*buf, *bit);
-	(*buf)++;
-	return ptr;
-}
-
-static uint16_t sw842_get_ptr4(const char **buf, int *bit,
-		struct sw842_fifo *fifo)
-{
-	uint16_t ptr;
-	ptr = htons(*(uint16_t *)(*buf));
-	ptr = (uint16_t)(ptr << *bit);
-	ptr = ptr >> 7;
-	ptr &= 0x01FF;
-	*buf += (*bit + 9) / 8;
-	*bit = (*bit + 9) % 8;
-	return ptr;
-}
-
-static uint8_t sw842_get_ptr8(const char **buf, int *bit,
-		struct sw842_fifo *fifo)
-{
-	return sw842_get_ptr2(buf, bit);
-}
-
-/* Software decompress template ops */
-
-static int sw842_data8(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	int ret;
-
-	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
-	if (ret)
-		return ret;
-	ret = sw842_data4(inbuf, inbit, outbuf, fifo);
-	return ret;
-}
-
-static int sw842_data4(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	int ret;
-
-	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
-	if (ret)
-		return ret;
-	ret = sw842_data2(inbuf, inbit, outbuf, fifo);
-	return ret;
-}
-
-static int sw842_data2(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	**outbuf = sw842_get_byte(*inbuf, *inbit);
-	(*inbuf)++;
-	(*outbuf)++;
-	**outbuf = sw842_get_byte(*inbuf, *inbit);
-	(*inbuf)++;
-	(*outbuf)++;
-	return 0;
-}
-
-static int sw842_ptr8(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	uint8_t ptr;
-	ptr = sw842_get_ptr8(inbuf, inbit, fifo);
-	if (!fifo->f84_full && (ptr >= fifo->f8_count))
-		return 1;
-	memcpy(*outbuf, fifo->f8[ptr], 8);
-	*outbuf += 8;
-	return 0;
-}
-
-static int sw842_ptr4(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	uint16_t ptr;
-	ptr = sw842_get_ptr4(inbuf, inbit, fifo);
-	if (!fifo->f84_full && (ptr >= fifo->f4_count))
-		return 1;
-	memcpy(*outbuf, fifo->f4[ptr], 4);
-	*outbuf += 4;
-	return 0;
-}
-
-static int sw842_ptr2(const char **inbuf, int *inbit,
-		unsigned char **outbuf, struct sw842_fifo *fifo)
-{
-	uint8_t ptr;
-	ptr = sw842_get_ptr2(inbuf, inbit);
-	if (!fifo->f2_full && (ptr >= fifo->f2_count))
-		return 1;
-	memcpy(*outbuf, fifo->f2[ptr], 2);
-	*outbuf += 2;
-	return 0;
-}
-
-static void sw842_copy_to_fifo(const char *buf, struct sw842_fifo *fifo)
-{
-	unsigned char initial_f2count = fifo->f2_count;
-
-	memcpy(fifo->f8[fifo->f8_count], buf, 8);
-	fifo->f4_count += 2;
-	fifo->f8_count += 1;
-
-	if (!fifo->f84_full && fifo->f4_count >= 512) {
-		fifo->f84_full = 1;
-		fifo->f4_count /= 512;
-	}
-
-	memcpy(fifo->f2[fifo->f2_count++], buf, 2);
-	memcpy(fifo->f2[fifo->f2_count++], buf + 2, 2);
-	memcpy(fifo->f2[fifo->f2_count++], buf + 4, 2);
-	memcpy(fifo->f2[fifo->f2_count++], buf + 6, 2);
-	if (fifo->f2_count < initial_f2count)
-		fifo->f2_full = 1;
-}
-
-static int sw842_decompress(const unsigned char *src, int srclen,
-			unsigned char *dst, int *destlen,
-			const void *wrkmem)
-{
-	uint8_t tmpl;
-	const char *inbuf;
-	int inbit = 0;
-	unsigned char *outbuf, *outbuf_end, *origbuf, *prevbuf;
-	const char *inbuf_end;
-	sw842_template_op op;
-	int opindex;
-	int i, repeat_count;
-	struct sw842_fifo *fifo;
-	int ret = 0;
-
-	fifo = &((struct nx842_workmem *)(wrkmem))->swfifo;
-	memset(fifo, 0, sizeof(*fifo));
-
-	origbuf = NULL;
-	inbuf = src;
-	inbuf_end = src + srclen;
-	outbuf = dst;
-	outbuf_end = dst + *destlen;
-
-	while ((tmpl = sw842_get_template(&inbuf, &inbit)) != SW842_TMPL_EOF) {
-		if (inbuf >= inbuf_end) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		opindex = 0;
-		prevbuf = origbuf;
-		origbuf = outbuf;
-		switch (tmpl) {
-		case SW842_TMPL_REPEAT:
-			if (prevbuf == NULL) {
-				ret = -EINVAL;
-				goto out;
-			}
-
-			repeat_count = sw842_get_repeat_count(&inbuf,
-								&inbit) + 1;
-
-			/* Did the repeat count advance past the end of input */
-			if (inbuf > inbuf_end) {
-				ret = -EINVAL;
-				goto out;
-			}
-
-			for (i = 0; i < repeat_count; i++) {
-				/* Would this overflow the output buffer */
-				if ((outbuf + 8) > outbuf_end) {
-					ret = -ENOSPC;
-					goto out;
-				}
-
-				memcpy(outbuf, prevbuf, 8);
-				sw842_copy_to_fifo(outbuf, fifo);
-				outbuf += 8;
-			}
-			break;
-
-		case SW842_TMPL_ZEROS:
-			/* Would this overflow the output buffer */
-			if ((outbuf + 8) > outbuf_end) {
-				ret = -ENOSPC;
-				goto out;
-			}
-
-			memset(outbuf, 0, 8);
-			sw842_copy_to_fifo(outbuf, fifo);
-			outbuf += 8;
-			break;
-
-		default:
-			if (tmpl > 25) {
-				ret = -EINVAL;
-				goto out;
-			}
-
-			/* Does this go past the end of the input buffer */
-			if ((inbuf + 2) > inbuf_end) {
-				ret = -EINVAL;
-				goto out;
-			}
-
-			/* Would this overflow the output buffer */
-			if ((outbuf + 8) > outbuf_end) {
-				ret = -ENOSPC;
-				goto out;
-			}
-
-			while (opindex < 4 &&
-				(op = sw842_tmpl_ops[tmpl][opindex++])
-					!= NULL) {
-				ret = (*op)(&inbuf, &inbit, &outbuf, fifo);
-				if (ret) {
-					ret = -EINVAL;
-					goto out;
-				}
-				sw842_copy_to_fifo(origbuf, fifo);
-			}
-		}
-	}
-
-out:
-	if (!ret)
-		*destlen = (unsigned int)(outbuf - dst);
-	else
-		*destlen = 0;
-
-	return ret;
-}
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
new file mode 100644
index 0000000..84b15b7
--- /dev/null
+++ b/drivers/crypto/nx/nx-842.h
@@ -0,0 +1,131 @@
+
+#ifndef __NX_842_H__
+#define __NX_842_H__
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nx842.h>
+#include <linux/sw842.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/ratelimit.h>
+
+/* Restrictions on Data Descriptor List (DDL) and Entry (DDE) buffers
+ *
+ * From NX P8 workbook, sec 4.9.1 "842 details"
+ *   Each DDE buffer is 128 byte aligned
+ *   Each DDE buffer size is a multiple of 32 bytes (except the last)
+ *   The last DDE buffer size is a multiple of 8 bytes
+ */
+#define DDE_BUFFER_ALIGN	(128)
+#define DDE_BUFFER_SIZE_MULT	(32)
+#define DDE_BUFFER_LAST_MULT	(8)
+
+/* Arbitrary DDL length limit
+ * Allows max buffer size of MAX-1 to MAX pages
+ * (depending on alignment)
+ */
+#define DDL_LEN_MAX		(17)
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842		(0x00003ff8)
+#define CCW_FC_842		(0x00000007)
+
+/* CCW Function Codes (FC) for 842
+ * NX P8 workbook, section 4.9, table 4-28
+ * "Function Code Definitions for 842 Memory Compression"
+ */
+#define CCW_FC_842_COMP_NOCRC	(0)
+#define CCW_FC_842_COMP_CRC	(1)
+#define CCW_FC_842_DECOMP_NOCRC	(2)
+#define CCW_FC_842_DECOMP_CRC	(3)
+#define CCW_FC_842_MOVE		(4)
+
+/* CSB CC Error Types for 842
+ * NX P8 workbook, section 4.10.3, table 4-30
+ * "Reported Error Types Summary Table"
+ */
+/* These are all duplicates of existing codes defined in icswx.h. */
+#define CSB_CC_TRANSLATION_DUP1	(80)
+#define CSB_CC_TRANSLATION_DUP2	(82)
+#define CSB_CC_TRANSLATION_DUP3	(84)
+#define CSB_CC_TRANSLATION_DUP4	(86)
+#define CSB_CC_TRANSLATION_DUP5	(92)
+#define CSB_CC_TRANSLATION_DUP6	(94)
+#define CSB_CC_PROTECTION_DUP1	(81)
+#define CSB_CC_PROTECTION_DUP2	(83)
+#define CSB_CC_PROTECTION_DUP3	(85)
+#define CSB_CC_PROTECTION_DUP4	(87)
+#define CSB_CC_PROTECTION_DUP5	(93)
+#define CSB_CC_PROTECTION_DUP6	(95)
+#define CSB_CC_RD_EXTERNAL_DUP1	(89)
+#define CSB_CC_RD_EXTERNAL_DUP2	(90)
+#define CSB_CC_RD_EXTERNAL_DUP3	(91)
+/* These are specific to NX */
+/* 842 codes */
+#define CSB_CC_TPBC_GT_SPBC	(64) /* no error, but >1 comp ratio */
+#define CSB_CC_CRC_MISMATCH	(65) /* decomp crc mismatch */
+#define CSB_CC_TEMPL_INVALID	(66) /* decomp invalid template value */
+#define CSB_CC_TEMPL_OVERFLOW	(67) /* decomp template shows data after end */
+/* sym crypt codes */
+#define CSB_CC_DECRYPT_OVERFLOW	(64)
+/* asym crypt codes */
+#define CSB_CC_MINV_OVERFLOW	(128)
+/* These are reserved for hypervisor use */
+#define CSB_CC_HYP_RESERVE_START	(240)
+#define CSB_CC_HYP_RESERVE_END		(253)
+#define CSB_CC_HYP_NO_HW		(254)
+#define CSB_CC_HYP_HANG_ABORTED		(255)
+
+/* CCB Completion Modes (CM) for 842
+ * NX P8 workbook, section 4.3, figure 4-5
+ * "CRB Details - Normal Cop_Req (CL=00, C=1)"
+ */
+#define CCB_CM_EXTRA_WRITE	(CCB_CM0_ALL_COMPLETIONS & CCB_CM12_STORE)
+#define CCB_CM_INTERRUPT	(CCB_CM0_ALL_COMPLETIONS & CCB_CM12_INTERRUPT)
+
+#define LEN_ON_PAGE(pa)		(PAGE_SIZE - ((pa) & ~PAGE_MASK))
+
+static inline unsigned long nx842_get_pa(void *addr)
+{
+	if (!is_vmalloc_addr(addr))
+		return __pa(addr);
+
+	return page_to_phys(vmalloc_to_page(addr)) + offset_in_page(addr);
+}
+
+/* Get/Set bit fields */
+#define MASK_LSH(m)		(__builtin_ffsl(m) - 1)
+#define GET_FIELD(v, m)		(((v) & (m)) >> MASK_LSH(m))
+#define SET_FIELD(v, m, val)	(((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m)))
+
+struct nx842_driver {
+	struct module *owner;
+
+	struct nx842_constraints *constraints;
+
+	int (*compress)(const unsigned char *in, unsigned int in_len,
+			unsigned char *out, unsigned int *out_len,
+			void *wrkmem);
+	int (*decompress)(const unsigned char *in, unsigned int in_len,
+			  unsigned char *out, unsigned int *out_len,
+			  void *wrkmem);
+};
+
+void nx842_register_driver(struct nx842_driver *driver);
+void nx842_unregister_driver(struct nx842_driver *driver);
+
+
+/* To allow the main nx-compress module to load platform module */
+#define NX842_POWERNV_MODULE_NAME	"nx-compress-powernv"
+#define NX842_POWERNV_COMPAT_NAME	"ibm,power-nx"
+#define NX842_PSERIES_MODULE_NAME	"nx-compress-pseries"
+#define NX842_PSERIES_COMPAT_NAME	"ibm,compression"
+
+
+#endif /* __NX_842_H__ */
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index 88c5624..e4e64f6 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -96,9 +96,6 @@ out:
 static int gcm_aes_nx_setauthsize(struct crypto_aead *tfm,
 				  unsigned int authsize)
 {
-	if (authsize > crypto_aead_alg(tfm)->maxauthsize)
-		return -EINVAL;
-
 	crypto_aead_crt(tfm)->authsize = authsize;
 
 	return 0;
diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c
index 23621da..4e91bdb 100644
--- a/drivers/crypto/nx/nx-sha256.c
+++ b/drivers/crypto/nx/nx-sha256.c
@@ -33,8 +33,9 @@ static int nx_sha256_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
+	struct nx_sg *out_sg;
 	int len;
-	int rc;
+	u32 max_sg_len;
 
 	nx_ctx_init(nx_ctx, HCOP_FC_SHA);
 
@@ -44,15 +45,18 @@ static int nx_sha256_init(struct shash_desc *desc)
 
 	NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA256);
 
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	len = SHA256_DIGEST_SIZE;
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg,
-				  &nx_ctx->op.outlen,
-				  &len,
-				  (u8 *) sctx->state,
-				  NX_DS_SHA256);
+	out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state,
+				  &len, max_sg_len);
+	nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
 
-	if (rc)
-		goto out;
+	if (len != SHA256_DIGEST_SIZE)
+		return -EINVAL;
 
 	sctx->state[0] = __cpu_to_be32(SHA256_H0);
 	sctx->state[1] = __cpu_to_be32(SHA256_H1);
@@ -64,7 +68,6 @@ static int nx_sha256_init(struct shash_desc *desc)
 	sctx->state[7] = __cpu_to_be32(SHA256_H7);
 	sctx->count = 0;
 
-out:
 	return 0;
 }
 
@@ -74,10 +77,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+	struct nx_sg *in_sg;
 	u64 to_process = 0, leftover, total;
 	unsigned long irq_flags;
 	int rc = 0;
 	int data_len;
+	u32 max_sg_len;
 	u64 buf_len = (sctx->count % SHA256_BLOCK_SIZE);
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
@@ -97,6 +102,12 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
 	NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
 	NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
+	in_sg = nx_ctx->in_sg;
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	do {
 		/*
 		 * to_process: the SHA256_BLOCK_SIZE data chunk to process in
@@ -108,25 +119,22 @@ static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
 
 		if (buf_len) {
 			data_len = buf_len;
-			rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-						  &nx_ctx->op.inlen,
-						  &data_len,
-						  (u8 *) sctx->buf,
-						  NX_DS_SHA256);
+			in_sg = nx_build_sg_list(nx_ctx->in_sg,
+						 (u8 *) sctx->buf,
+						 &data_len,
+						 max_sg_len);
 
-			if (rc || data_len != buf_len)
+			if (data_len != buf_len) {
+				rc = -EINVAL;
 				goto out;
+			}
 		}
 
 		data_len = to_process - buf_len;
-		rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-					  &nx_ctx->op.inlen,
-					  &data_len,
-					  (u8 *) data,
-					  NX_DS_SHA256);
+		in_sg = nx_build_sg_list(in_sg, (u8 *) data,
+					 &data_len, max_sg_len);
 
-		if (rc)
-			goto out;
+		nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
 
 		to_process = (data_len + buf_len);
 		leftover = total - to_process;
@@ -173,12 +181,19 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out)
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+	struct nx_sg *in_sg, *out_sg;
 	unsigned long irq_flags;
-	int rc;
+	u32 max_sg_len;
+	int rc = 0;
 	int len;
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	/* final is represented by continuing the operation and indicating that
 	 * this is not an intermediate operation */
 	if (sctx->count >= SHA256_BLOCK_SIZE) {
@@ -195,25 +210,24 @@ static int nx_sha256_final(struct shash_desc *desc, u8 *out)
 	csbcpb->cpb.sha256.message_bit_length = (u64) (sctx->count * 8);
 
 	len = sctx->count & (SHA256_BLOCK_SIZE - 1);
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-				  &nx_ctx->op.inlen,
-				  &len,
-				  (u8 *) sctx->buf,
-				  NX_DS_SHA256);
+	in_sg = nx_build_sg_list(nx_ctx->in_sg, (u8 *) sctx->buf,
+				 &len, max_sg_len);
 
-	if (rc || len != (sctx->count & (SHA256_BLOCK_SIZE - 1)))
+	if (len != (sctx->count & (SHA256_BLOCK_SIZE - 1))) {
+		rc = -EINVAL;
 		goto out;
+	}
 
 	len = SHA256_DIGEST_SIZE;
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg,
-				  &nx_ctx->op.outlen,
-				  &len,
-				  out,
-				  NX_DS_SHA256);
+	out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len, max_sg_len);
 
-	if (rc || len != SHA256_DIGEST_SIZE)
+	if (len != SHA256_DIGEST_SIZE) {
+		rc = -EINVAL;
 		goto out;
+	}
 
+	nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
+	nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
 	if (!nx_ctx->op.outlen) {
 		rc = -EINVAL;
 		goto out;
diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c
index b3adf10..e6a58d2 100644
--- a/drivers/crypto/nx/nx-sha512.c
+++ b/drivers/crypto/nx/nx-sha512.c
@@ -32,8 +32,9 @@ static int nx_sha512_init(struct shash_desc *desc)
 {
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
+	struct nx_sg *out_sg;
 	int len;
-	int rc;
+	u32 max_sg_len;
 
 	nx_ctx_init(nx_ctx, HCOP_FC_SHA);
 
@@ -43,15 +44,18 @@ static int nx_sha512_init(struct shash_desc *desc)
 
 	NX_CPB_SET_DIGEST_SIZE(nx_ctx->csbcpb, NX_DS_SHA512);
 
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	len = SHA512_DIGEST_SIZE;
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg,
-				  &nx_ctx->op.outlen,
-				  &len,
-				  (u8 *)sctx->state,
-				  NX_DS_SHA512);
+	out_sg = nx_build_sg_list(nx_ctx->out_sg, (u8 *)sctx->state,
+				  &len, max_sg_len);
+	nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
 
-	if (rc || len != SHA512_DIGEST_SIZE)
-		goto out;
+	if (len != SHA512_DIGEST_SIZE)
+		return -EINVAL;
 
 	sctx->state[0] = __cpu_to_be64(SHA512_H0);
 	sctx->state[1] = __cpu_to_be64(SHA512_H1);
@@ -63,7 +67,6 @@ static int nx_sha512_init(struct shash_desc *desc)
 	sctx->state[7] = __cpu_to_be64(SHA512_H7);
 	sctx->count[0] = 0;
 
-out:
 	return 0;
 }
 
@@ -73,10 +76,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+	struct nx_sg *in_sg;
 	u64 to_process, leftover = 0, total;
 	unsigned long irq_flags;
 	int rc = 0;
 	int data_len;
+	u32 max_sg_len;
 	u64 buf_len = (sctx->count[0] % SHA512_BLOCK_SIZE);
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
@@ -96,6 +101,12 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
 	NX_CPB_FDM(csbcpb) |= NX_FDM_INTERMEDIATE;
 	NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
+	in_sg = nx_ctx->in_sg;
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	do {
 		/*
 		 * to_process: the SHA512_BLOCK_SIZE data chunk to process in
@@ -108,25 +119,26 @@ static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
 
 		if (buf_len) {
 			data_len = buf_len;
-			rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-						  &nx_ctx->op.inlen,
-						  &data_len,
-						  (u8 *) sctx->buf,
-						  NX_DS_SHA512);
+			in_sg = nx_build_sg_list(nx_ctx->in_sg,
+						 (u8 *) sctx->buf,
+						 &data_len, max_sg_len);
 
-			if (rc || data_len != buf_len)
+			if (data_len != buf_len) {
+				rc = -EINVAL;
 				goto out;
+			}
 		}
 
 		data_len = to_process - buf_len;
-		rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-					  &nx_ctx->op.inlen,
-					  &data_len,
-					  (u8 *) data,
-					  NX_DS_SHA512);
+		in_sg = nx_build_sg_list(in_sg, (u8 *) data,
+					 &data_len, max_sg_len);
 
-		if (rc || data_len != (to_process - buf_len))
+		nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
+
+		if (data_len != (to_process - buf_len)) {
+			rc = -EINVAL;
 			goto out;
+		}
 
 		to_process = (data_len + buf_len);
 		leftover = total - to_process;
@@ -172,13 +184,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out)
 	struct sha512_state *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
+	struct nx_sg *in_sg, *out_sg;
+	u32 max_sg_len;
 	u64 count0;
 	unsigned long irq_flags;
-	int rc;
+	int rc = 0;
 	int len;
 
 	spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
+	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
+			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
+	max_sg_len = min_t(u64, max_sg_len,
+			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
+
 	/* final is represented by continuing the operation and indicating that
 	 * this is not an intermediate operation */
 	if (sctx->count[0] >= SHA512_BLOCK_SIZE) {
@@ -200,24 +219,20 @@ static int nx_sha512_final(struct shash_desc *desc, u8 *out)
 	csbcpb->cpb.sha512.message_bit_length_lo = count0;
 
 	len = sctx->count[0] & (SHA512_BLOCK_SIZE - 1);
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->in_sg,
-				  &nx_ctx->op.inlen,
-				  &len,
-				  (u8 *)sctx->buf,
-				  NX_DS_SHA512);
+	in_sg = nx_build_sg_list(nx_ctx->in_sg, sctx->buf, &len,
+				 max_sg_len);
 
-	if (rc || len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1)))
+	if (len != (sctx->count[0] & (SHA512_BLOCK_SIZE - 1))) {
+		rc = -EINVAL;
 		goto out;
+	}
 
 	len = SHA512_DIGEST_SIZE;
-	rc = nx_sha_build_sg_list(nx_ctx, nx_ctx->out_sg,
-				  &nx_ctx->op.outlen,
-				  &len,
-				  out,
-				  NX_DS_SHA512);
+	out_sg = nx_build_sg_list(nx_ctx->out_sg, out, &len,
+				 max_sg_len);
 
-	if (rc)
-		goto out;
+	nx_ctx->op.inlen = (nx_ctx->in_sg - in_sg) * sizeof(struct nx_sg);
+	nx_ctx->op.outlen = (nx_ctx->out_sg - out_sg) * sizeof(struct nx_sg);
 
 	if (!nx_ctx->op.outlen) {
 		rc = -EINVAL;
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 1da6dc5..2e2529c 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -215,8 +215,15 @@ struct nx_sg *nx_walk_and_build(struct nx_sg       *nx_dst,
  * @delta:  is the amount we need to crop in order to bound the list.
  *
  */
-static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int delta)
+static long int trim_sg_list(struct nx_sg *sg,
+			     struct nx_sg *end,
+			     unsigned int delta,
+			     unsigned int *nbytes)
 {
+	long int oplen;
+	long int data_back;
+	unsigned int is_delta = delta;
+
 	while (delta && end > sg) {
 		struct nx_sg *last = end - 1;
 
@@ -228,54 +235,20 @@ static long int trim_sg_list(struct nx_sg *sg, struct nx_sg *end, unsigned int d
 			delta -= last->len;
 		}
 	}
-	return (sg - end) * sizeof(struct nx_sg);
-}
-
-/**
- * nx_sha_build_sg_list - walk and build sg list to sha modes
- *			  using right bounds and limits.
- * @nx_ctx: NX crypto context for the lists we're building
- * @nx_sg: current sg list in or out list
- * @op_len: current op_len to be used in order to build a sg list
- * @nbytes:  number or bytes to be processed
- * @offset: buf offset
- * @mode: SHA256 or SHA512
- */
-int nx_sha_build_sg_list(struct nx_crypto_ctx *nx_ctx,
-			  struct nx_sg 	      *nx_in_outsg,
-			  s64		      *op_len,
-			  unsigned int        *nbytes,
-			  u8 		      *offset,
-			  u32		      mode)
-{
-	unsigned int delta = 0;
-	unsigned int total = *nbytes;
-	struct nx_sg *nx_insg = nx_in_outsg;
-	unsigned int max_sg_len;
 
-	max_sg_len = min_t(u64, nx_ctx->ap->sglen,
-			nx_driver.of.max_sg_len/sizeof(struct nx_sg));
-	max_sg_len = min_t(u64, max_sg_len,
-			nx_ctx->ap->databytelen/NX_PAGE_SIZE);
-
-	*nbytes = min_t(u64, *nbytes, nx_ctx->ap->databytelen);
-	nx_insg = nx_build_sg_list(nx_insg, offset, nbytes, max_sg_len);
-
-	switch (mode) {
-	case NX_DS_SHA256:
-		if (*nbytes < total)
-			delta = *nbytes - (*nbytes & ~(SHA256_BLOCK_SIZE - 1));
-		break;
-	case NX_DS_SHA512:
-		if (*nbytes < total)
-			delta = *nbytes - (*nbytes & ~(SHA512_BLOCK_SIZE - 1));
-		break;
-	default:
-		return -EINVAL;
+	/* There are cases where we need to crop list in order to make it
+	 * a block size multiple, but we also need to align data. In order to
+	 * that we need to calculate how much we need to put back to be
+	 * processed
+	 */
+	oplen = (sg - end) * sizeof(struct nx_sg);
+	if (is_delta) {
+		data_back = (abs(oplen) / AES_BLOCK_SIZE) *  sg->len;
+		data_back = *nbytes - (data_back & ~(AES_BLOCK_SIZE - 1));
+		*nbytes -= data_back;
 	}
-	*op_len = trim_sg_list(nx_in_outsg, nx_insg, delta);
 
-	return 0;
+	return oplen;
 }
 
 /**
@@ -330,8 +303,8 @@ int nx_build_sg_lists(struct nx_crypto_ctx  *nx_ctx,
 	/* these lengths should be negative, which will indicate to phyp that
 	 * the input and output parameters are scatterlists, not linear
 	 * buffers */
-	nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta);
-	nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta);
+	nx_ctx->op.inlen = trim_sg_list(nx_ctx->in_sg, nx_insg, delta, nbytes);
+	nx_ctx->op.outlen = trim_sg_list(nx_ctx->out_sg, nx_outsg, delta, nbytes);
 
 	return 0;
 }
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index 6c9ecaa..41b87ee 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -153,8 +153,6 @@ void nx_crypto_ctx_exit(struct crypto_tfm *tfm);
 void nx_ctx_init(struct nx_crypto_ctx *nx_ctx, unsigned int function);
 int nx_hcall_sync(struct nx_crypto_ctx *ctx, struct vio_pfo_op *op,
 		  u32 may_sleep);
-int nx_sha_build_sg_list(struct nx_crypto_ctx *, struct nx_sg *,
-			 s64 *, unsigned int *, u8 *, u32);
 struct nx_sg *nx_build_sg_list(struct nx_sg *, u8 *, unsigned int *, u32);
 int nx_build_sg_lists(struct nx_crypto_ctx *, struct blkcipher_desc *,
 		      struct scatterlist *, struct scatterlist *, unsigned int *,
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 4d63e0d..b2024c95 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -362,7 +362,13 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
 
 static int omap_sham_hw_init(struct omap_sham_dev *dd)
 {
-	pm_runtime_get_sync(dd->dev);
+	int err;
+
+	err = pm_runtime_get_sync(dd->dev);
+	if (err < 0) {
+		dev_err(dd->dev, "failed to get sync: %d\n", err);
+		return err;
+	}
 
 	if (!test_bit(FLAGS_INIT, &dd->flags)) {
 		set_bit(FLAGS_INIT, &dd->flags);
@@ -1793,6 +1799,10 @@ static const struct of_device_id omap_sham_of_match[] = {
 		.data		= &omap_sham_pdata_omap2,
 	},
 	{
+		.compatible	= "ti,omap3-sham",
+		.data		= &omap_sham_pdata_omap2,
+	},
+	{
 		.compatible	= "ti,omap4-sham",
 		.data		= &omap_sham_pdata_omap4,
 	},
@@ -1947,7 +1957,13 @@ static int omap_sham_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 	pm_runtime_irq_safe(dev);
-	pm_runtime_get_sync(dev);
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get sync: %d\n", err);
+		goto err_pm;
+	}
+
 	rev = omap_sham_read(dd, SHA_REG_REV(dd));
 	pm_runtime_put_sync(&pdev->dev);
 
@@ -1977,6 +1993,7 @@ err_algs:
 		for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
 			crypto_unregister_ahash(
 					&dd->pdata->algs_info[i].algs_list[j]);
+err_pm:
 	pm_runtime_disable(dev);
 	if (dd->dma_lch)
 		dma_release_channel(dd->dma_lch);
@@ -2019,7 +2036,11 @@ static int omap_sham_suspend(struct device *dev)
 
 static int omap_sham_resume(struct device *dev)
 {
-	pm_runtime_get_sync(dev);
+	int err = pm_runtime_get_sync(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get sync: %d\n", err);
+		return err;
+	}
 	return 0;
 }
 #endif
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 5da5b98..eb2a0ca 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -15,7 +15,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
@@ -790,7 +790,8 @@ static int spacc_aead_cra_init(struct crypto_tfm *tfm)
 
 	get_random_bytes(ctx->salt, sizeof(ctx->salt));
 
-	tfm->crt_aead.reqsize = sizeof(struct spacc_req);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+				sizeof(struct spacc_req));
 
 	return 0;
 }
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index f22ce71..5fe9029 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -48,7 +48,6 @@
 #define ADF_ACCEL_DEVICES_H_
 #include <linux/module.h>
 #include <linux/list.h>
-#include <linux/proc_fs.h>
 #include <linux/io.h>
 #include "adf_cfg_common.h"
 
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_user.h b/drivers/crypto/qat/qat_common/adf_cfg_user.h
index 0c38a15..ef5988a 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg_user.h
+++ b/drivers/crypto/qat/qat_common/adf_cfg_user.h
@@ -54,14 +54,6 @@ struct adf_user_cfg_key_val {
 	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
 	char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
 	union {
-		char *user_val_ptr;
-		uint64_t padding1;
-	};
-	union {
-		struct adf_user_cfg_key_val *prev;
-		uint64_t padding2;
-	};
-	union {
 		struct adf_user_cfg_key_val *next;
 		uint64_t padding3;
 	};
@@ -75,10 +67,6 @@ struct adf_user_cfg_section {
 		uint64_t padding1;
 	};
 	union {
-		struct adf_user_cfg_section *prev;
-		uint64_t padding2;
-	};
-	union {
 		struct adf_user_cfg_section *next;
 		uint64_t padding3;
 	};
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 0666ee6..27e16c0 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -53,6 +53,13 @@
 #include "icp_qat_fw_loader_handle.h"
 #include "icp_qat_hal.h"
 
+#define ADF_MAJOR_VERSION	0
+#define ADF_MINOR_VERSION	1
+#define ADF_BUILD_VERSION	3
+#define ADF_DRV_VERSION		__stringify(ADF_MAJOR_VERSION) "." \
+				__stringify(ADF_MINOR_VERSION) "." \
+				__stringify(ADF_BUILD_VERSION)
+
 #define ADF_STATUS_RESTARTING 0
 #define ADF_STATUS_STARTING 1
 #define ADF_STATUS_CONFIGURED 2
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index cb5f066..e056b9e 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -504,3 +504,4 @@ MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
 MODULE_ALIAS_CRYPTO("intel_qat");
+MODULE_VERSION(ADF_DRV_VERSION);
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 1dc5b0a..dc231b8 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -47,7 +47,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include <crypto/hash.h>
@@ -1094,8 +1094,9 @@ static int qat_alg_aead_init(struct crypto_tfm *tfm,
 		return -EFAULT;
 	spin_lock_init(&ctx->lock);
 	ctx->qat_hash_alg = hash;
-	tfm->crt_aead.reqsize = sizeof(struct aead_request) +
-				sizeof(struct qat_crypto_request);
+	crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+		sizeof(struct aead_request) +
+		sizeof(struct qat_crypto_request));
 	ctx->tfm = tfm;
 	return 0;
 }
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index 9decea2..ecf0ef1 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -417,5 +417,6 @@ module_exit(adfdrv_release);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
-MODULE_FIRMWARE("qat_895xcc.bin");
+MODULE_FIRMWARE(ADF_DH895XCC_FW);
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
+MODULE_VERSION(ADF_DRV_VERSION);
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 857414a..83aca95 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -46,7 +46,7 @@
 #include <crypto/des.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
-#include <crypto/aead.h>
+#include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
 #include <crypto/hash.h>
@@ -55,49 +55,92 @@
 
 #include "talitos.h"
 
-static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr)
+static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
+			   bool is_sec1)
 {
-	talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
-	talitos_ptr->eptr = upper_32_bits(dma_addr);
+	ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
+	if (!is_sec1)
+		ptr->eptr = upper_32_bits(dma_addr);
+}
+
+static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len,
+			       bool is_sec1)
+{
+	if (is_sec1) {
+		ptr->res = 0;
+		ptr->len1 = cpu_to_be16(len);
+	} else {
+		ptr->len = cpu_to_be16(len);
+	}
+}
+
+static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr,
+					   bool is_sec1)
+{
+	if (is_sec1)
+		return be16_to_cpu(ptr->len1);
+	else
+		return be16_to_cpu(ptr->len);
+}
+
+static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1)
+{
+	if (!is_sec1)
+		ptr->j_extent = 0;
 }
 
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
 static void map_single_talitos_ptr(struct device *dev,
-				   struct talitos_ptr *talitos_ptr,
-				   unsigned short len, void *data,
-				   unsigned char extent,
+				   struct talitos_ptr *ptr,
+				   unsigned int len, void *data,
 				   enum dma_data_direction dir)
 {
 	dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
-	talitos_ptr->len = cpu_to_be16(len);
-	to_talitos_ptr(talitos_ptr, dma_addr);
-	talitos_ptr->j_extent = extent;
+	to_talitos_ptr_len(ptr, len, is_sec1);
+	to_talitos_ptr(ptr, dma_addr, is_sec1);
+	to_talitos_ptr_extent_clear(ptr, is_sec1);
 }
 
 /*
  * unmap bus single (contiguous) h/w descriptor pointer
  */
 static void unmap_single_talitos_ptr(struct device *dev,
-				     struct talitos_ptr *talitos_ptr,
+				     struct talitos_ptr *ptr,
 				     enum dma_data_direction dir)
 {
-	dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr),
-			 be16_to_cpu(talitos_ptr->len), dir);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
+			 from_talitos_ptr_len(ptr, is_sec1), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	unsigned int timeout = TALITOS_TIMEOUT;
+	bool is_sec1 = has_ftr_sec1(priv);
 
-	setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET);
+	if (is_sec1) {
+		setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
+			  TALITOS1_CCCR_LO_RESET);
 
-	while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET)
-	       && --timeout)
-		cpu_relax();
+		while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) &
+			TALITOS1_CCCR_LO_RESET) && --timeout)
+			cpu_relax();
+	} else {
+		setbits32(priv->chan[ch].reg + TALITOS_CCCR,
+			  TALITOS2_CCCR_RESET);
+
+		while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
+			TALITOS2_CCCR_RESET) && --timeout)
+			cpu_relax();
+	}
 
 	if (timeout == 0) {
 		dev_err(dev, "failed to reset channel %d\n", ch);
@@ -120,11 +163,12 @@ static int reset_device(struct device *dev)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	unsigned int timeout = TALITOS_TIMEOUT;
-	u32 mcr = TALITOS_MCR_SWR;
+	bool is_sec1 = has_ftr_sec1(priv);
+	u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR;
 
 	setbits32(priv->reg + TALITOS_MCR, mcr);
 
-	while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR)
+	while ((in_be32(priv->reg + TALITOS_MCR) & mcr)
 	       && --timeout)
 		cpu_relax();
 
@@ -148,6 +192,7 @@ static int init_device(struct device *dev)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	int ch, err;
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	/*
 	 * Master reset
@@ -171,12 +216,19 @@ static int init_device(struct device *dev)
 	}
 
 	/* enable channel done and error interrupts */
-	setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT);
-	setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT);
+	if (is_sec1) {
+		clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT);
+		clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT);
+		/* disable parity error check in DEU (erroneous? test vect.) */
+		setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE);
+	} else {
+		setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT);
+		setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT);
+	}
 
 	/* disable integrity check error interrupts (use writeback instead) */
 	if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
-		setbits32(priv->reg + TALITOS_MDEUICR_LO,
+		setbits32(priv->reg_mdeu + TALITOS_EUICR_LO,
 		          TALITOS_MDEUICR_LO_ICE);
 
 	return 0;
@@ -204,6 +256,7 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 	struct talitos_request *request;
 	unsigned long flags;
 	int head;
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
 
@@ -217,8 +270,17 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 	request = &priv->chan[ch].fifo[head];
 
 	/* map descriptor and save caller data */
-	request->dma_desc = dma_map_single(dev, desc, sizeof(*desc),
-					   DMA_BIDIRECTIONAL);
+	if (is_sec1) {
+		desc->hdr1 = desc->hdr;
+		desc->next_desc = 0;
+		request->dma_desc = dma_map_single(dev, &desc->hdr1,
+						   TALITOS_DESC_SIZE,
+						   DMA_BIDIRECTIONAL);
+	} else {
+		request->dma_desc = dma_map_single(dev, desc,
+						   TALITOS_DESC_SIZE,
+						   DMA_BIDIRECTIONAL);
+	}
 	request->callback = callback;
 	request->context = context;
 
@@ -250,16 +312,21 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 	struct talitos_request *request, saved_req;
 	unsigned long flags;
 	int tail, status;
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	spin_lock_irqsave(&priv->chan[ch].tail_lock, flags);
 
 	tail = priv->chan[ch].tail;
 	while (priv->chan[ch].fifo[tail].desc) {
+		__be32 hdr;
+
 		request = &priv->chan[ch].fifo[tail];
 
 		/* descriptors with their done bits set don't get the error */
 		rmb();
-		if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
+		hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr;
+
+		if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
 			status = 0;
 		else
 			if (!error)
@@ -268,7 +335,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 				status = error;
 
 		dma_unmap_single(dev, request->dma_desc,
-				 sizeof(struct talitos_desc),
+				 TALITOS_DESC_SIZE,
 				 DMA_BIDIRECTIONAL);
 
 		/* copy entries so we can call callback outside lock */
@@ -302,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch)
 /*
  * process completed requests for channels that have done status
  */
-#define DEF_TALITOS_DONE(name, ch_done_mask)				\
-static void talitos_done_##name(unsigned long data)			\
+#define DEF_TALITOS1_DONE(name, ch_done_mask)				\
+static void talitos1_done_##name(unsigned long data)			\
+{									\
+	struct device *dev = (struct device *)data;			\
+	struct talitos_private *priv = dev_get_drvdata(dev);		\
+	unsigned long flags;						\
+									\
+	if (ch_done_mask & 0x10000000)					\
+		flush_channel(dev, 0, 0, 0);			\
+	if (priv->num_channels == 1)					\
+		goto out;						\
+	if (ch_done_mask & 0x40000000)					\
+		flush_channel(dev, 1, 0, 0);			\
+	if (ch_done_mask & 0x00010000)					\
+		flush_channel(dev, 2, 0, 0);			\
+	if (ch_done_mask & 0x00040000)					\
+		flush_channel(dev, 3, 0, 0);			\
+									\
+out:									\
+	/* At this point, all completed channels have been processed */	\
+	/* Unmask done interrupts for channels completed later on. */	\
+	spin_lock_irqsave(&priv->reg_lock, flags);			\
+	clrbits32(priv->reg + TALITOS_IMR, ch_done_mask);		\
+	clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT);	\
+	spin_unlock_irqrestore(&priv->reg_lock, flags);			\
+}
+
+DEF_TALITOS1_DONE(4ch, TALITOS1_ISR_4CHDONE)
+
+#define DEF_TALITOS2_DONE(name, ch_done_mask)				\
+static void talitos2_done_##name(unsigned long data)			\
 {									\
 	struct device *dev = (struct device *)data;			\
 	struct talitos_private *priv = dev_get_drvdata(dev);		\
@@ -325,12 +421,13 @@ out:									\
 	/* Unmask done interrupts for channels completed later on. */	\
 	spin_lock_irqsave(&priv->reg_lock, flags);			\
 	setbits32(priv->reg + TALITOS_IMR, ch_done_mask);		\
-	setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT);	\
+	setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT);	\
 	spin_unlock_irqrestore(&priv->reg_lock, flags);			\
 }
-DEF_TALITOS_DONE(4ch, TALITOS_ISR_4CHDONE)
-DEF_TALITOS_DONE(ch0_2, TALITOS_ISR_CH_0_2_DONE)
-DEF_TALITOS_DONE(ch1_3, TALITOS_ISR_CH_1_3_DONE)
+
+DEF_TALITOS2_DONE(4ch, TALITOS2_ISR_4CHDONE)
+DEF_TALITOS2_DONE(ch0_2, TALITOS2_ISR_CH_0_2_DONE)
+DEF_TALITOS2_DONE(ch1_3, TALITOS2_ISR_CH_1_3_DONE)
 
 /*
  * locate current (offending) descriptor
@@ -377,44 +474,44 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr)
 	switch (desc_hdr & DESC_HDR_SEL0_MASK) {
 	case DESC_HDR_SEL0_AFEU:
 		dev_err(dev, "AFEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_AFEUISR),
-			in_be32(priv->reg + TALITOS_AFEUISR_LO));
+			in_be32(priv->reg_afeu + TALITOS_EUISR),
+			in_be32(priv->reg_afeu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_DEU:
 		dev_err(dev, "DEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_DEUISR),
-			in_be32(priv->reg + TALITOS_DEUISR_LO));
+			in_be32(priv->reg_deu + TALITOS_EUISR),
+			in_be32(priv->reg_deu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_MDEUA:
 	case DESC_HDR_SEL0_MDEUB:
 		dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_MDEUISR),
-			in_be32(priv->reg + TALITOS_MDEUISR_LO));
+			in_be32(priv->reg_mdeu + TALITOS_EUISR),
+			in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_RNG:
 		dev_err(dev, "RNGUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_RNGUISR),
-			in_be32(priv->reg + TALITOS_RNGUISR_LO));
+			in_be32(priv->reg_rngu + TALITOS_ISR),
+			in_be32(priv->reg_rngu + TALITOS_ISR_LO));
 		break;
 	case DESC_HDR_SEL0_PKEU:
 		dev_err(dev, "PKEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_PKEUISR),
-			in_be32(priv->reg + TALITOS_PKEUISR_LO));
+			in_be32(priv->reg_pkeu + TALITOS_EUISR),
+			in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_AESU:
 		dev_err(dev, "AESUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_AESUISR),
-			in_be32(priv->reg + TALITOS_AESUISR_LO));
+			in_be32(priv->reg_aesu + TALITOS_EUISR),
+			in_be32(priv->reg_aesu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_CRCU:
 		dev_err(dev, "CRCUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_CRCUISR),
-			in_be32(priv->reg + TALITOS_CRCUISR_LO));
+			in_be32(priv->reg_crcu + TALITOS_EUISR),
+			in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL0_KEU:
 		dev_err(dev, "KEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_KEUISR),
-			in_be32(priv->reg + TALITOS_KEUISR_LO));
+			in_be32(priv->reg_pkeu + TALITOS_EUISR),
+			in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
 		break;
 	}
 
@@ -422,13 +519,13 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr)
 	case DESC_HDR_SEL1_MDEUA:
 	case DESC_HDR_SEL1_MDEUB:
 		dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_MDEUISR),
-			in_be32(priv->reg + TALITOS_MDEUISR_LO));
+			in_be32(priv->reg_mdeu + TALITOS_EUISR),
+			in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
 		break;
 	case DESC_HDR_SEL1_CRCU:
 		dev_err(dev, "CRCUISR 0x%08x_%08x\n",
-			in_be32(priv->reg + TALITOS_CRCUISR),
-			in_be32(priv->reg + TALITOS_CRCUISR_LO));
+			in_be32(priv->reg_crcu + TALITOS_EUISR),
+			in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
 		break;
 	}
 
@@ -445,17 +542,24 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 {
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	unsigned int timeout = TALITOS_TIMEOUT;
-	int ch, error, reset_dev = 0, reset_ch = 0;
-	u32 v, v_lo;
+	int ch, error, reset_dev = 0;
+	u32 v_lo;
+	bool is_sec1 = has_ftr_sec1(priv);
+	int reset_ch = is_sec1 ? 1 : 0; /* only SEC2 supports continuation */
 
 	for (ch = 0; ch < priv->num_channels; ch++) {
 		/* skip channels without errors */
-		if (!(isr & (1 << (ch * 2 + 1))))
-			continue;
+		if (is_sec1) {
+			/* bits 29, 31, 17, 19 */
+			if (!(isr & (1 << (29 + (ch & 1) * 2 - (ch & 2) * 6))))
+				continue;
+		} else {
+			if (!(isr & (1 << (ch * 2 + 1))))
+				continue;
+		}
 
 		error = -EINVAL;
 
-		v = in_be32(priv->chan[ch].reg + TALITOS_CCPSR);
 		v_lo = in_be32(priv->chan[ch].reg + TALITOS_CCPSR_LO);
 
 		if (v_lo & TALITOS_CCPSR_LO_DOF) {
@@ -471,23 +575,28 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 		if (v_lo & TALITOS_CCPSR_LO_MDTE)
 			dev_err(dev, "master data transfer error\n");
 		if (v_lo & TALITOS_CCPSR_LO_SGDLZ)
-			dev_err(dev, "s/g data length zero error\n");
+			dev_err(dev, is_sec1 ? "pointeur not complete error\n"
+					     : "s/g data length zero error\n");
 		if (v_lo & TALITOS_CCPSR_LO_FPZ)
-			dev_err(dev, "fetch pointer zero error\n");
+			dev_err(dev, is_sec1 ? "parity error\n"
+					     : "fetch pointer zero error\n");
 		if (v_lo & TALITOS_CCPSR_LO_IDH)
 			dev_err(dev, "illegal descriptor header error\n");
 		if (v_lo & TALITOS_CCPSR_LO_IEU)
-			dev_err(dev, "invalid execution unit error\n");
+			dev_err(dev, is_sec1 ? "static assignment error\n"
+					     : "invalid exec unit error\n");
 		if (v_lo & TALITOS_CCPSR_LO_EU)
 			report_eu_error(dev, ch, current_desc_hdr(dev, ch));
-		if (v_lo & TALITOS_CCPSR_LO_GB)
-			dev_err(dev, "gather boundary error\n");
-		if (v_lo & TALITOS_CCPSR_LO_GRL)
-			dev_err(dev, "gather return/length error\n");
-		if (v_lo & TALITOS_CCPSR_LO_SB)
-			dev_err(dev, "scatter boundary error\n");
-		if (v_lo & TALITOS_CCPSR_LO_SRL)
-			dev_err(dev, "scatter return/length error\n");
+		if (!is_sec1) {
+			if (v_lo & TALITOS_CCPSR_LO_GB)
+				dev_err(dev, "gather boundary error\n");
+			if (v_lo & TALITOS_CCPSR_LO_GRL)
+				dev_err(dev, "gather return/length error\n");
+			if (v_lo & TALITOS_CCPSR_LO_SB)
+				dev_err(dev, "scatter boundary error\n");
+			if (v_lo & TALITOS_CCPSR_LO_SRL)
+				dev_err(dev, "scatter return/length error\n");
+		}
 
 		flush_channel(dev, ch, error, reset_ch);
 
@@ -495,10 +604,10 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 			reset_channel(dev, ch);
 		} else {
 			setbits32(priv->chan[ch].reg + TALITOS_CCCR,
-				  TALITOS_CCCR_CONT);
+				  TALITOS2_CCCR_CONT);
 			setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, 0);
 			while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
-			       TALITOS_CCCR_CONT) && --timeout)
+			       TALITOS2_CCCR_CONT) && --timeout)
 				cpu_relax();
 			if (timeout == 0) {
 				dev_err(dev, "failed to restart channel %d\n",
@@ -507,9 +616,14 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 			}
 		}
 	}
-	if (reset_dev || isr & ~TALITOS_ISR_4CHERR || isr_lo) {
-		dev_err(dev, "done overflow, internal time out, or rngu error: "
-		        "ISR 0x%08x_%08x\n", isr, isr_lo);
+	if (reset_dev || (is_sec1 && isr & ~TALITOS1_ISR_4CHERR) ||
+	    (!is_sec1 && isr & ~TALITOS2_ISR_4CHERR) || isr_lo) {
+		if (is_sec1 && (isr_lo & TALITOS1_ISR_TEA_ERR))
+			dev_err(dev, "TEA error: ISR 0x%08x_%08x\n",
+				isr, isr_lo);
+		else
+			dev_err(dev, "done overflow, internal time out, or "
+				"rngu error: ISR 0x%08x_%08x\n", isr, isr_lo);
 
 		/* purge request queues */
 		for (ch = 0; ch < priv->num_channels; ch++)
@@ -520,8 +634,43 @@ static void talitos_error(struct device *dev, u32 isr, u32 isr_lo)
 	}
 }
 
-#define DEF_TALITOS_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet)	       \
-static irqreturn_t talitos_interrupt_##name(int irq, void *data)	       \
+#define DEF_TALITOS1_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet)	       \
+static irqreturn_t talitos1_interrupt_##name(int irq, void *data)	       \
+{									       \
+	struct device *dev = data;					       \
+	struct talitos_private *priv = dev_get_drvdata(dev);		       \
+	u32 isr, isr_lo;						       \
+	unsigned long flags;						       \
+									       \
+	spin_lock_irqsave(&priv->reg_lock, flags);			       \
+	isr = in_be32(priv->reg + TALITOS_ISR);				       \
+	isr_lo = in_be32(priv->reg + TALITOS_ISR_LO);			       \
+	/* Acknowledge interrupt */					       \
+	out_be32(priv->reg + TALITOS_ICR, isr & (ch_done_mask | ch_err_mask)); \
+	out_be32(priv->reg + TALITOS_ICR_LO, isr_lo);			       \
+									       \
+	if (unlikely(isr & ch_err_mask || isr_lo & TALITOS1_IMR_LO_INIT)) {    \
+		spin_unlock_irqrestore(&priv->reg_lock, flags);		       \
+		talitos_error(dev, isr & ch_err_mask, isr_lo);		       \
+	}								       \
+	else {								       \
+		if (likely(isr & ch_done_mask)) {			       \
+			/* mask further done interrupts. */		       \
+			setbits32(priv->reg + TALITOS_IMR, ch_done_mask);      \
+			/* done_task will unmask done interrupts at exit */    \
+			tasklet_schedule(&priv->done_task[tlet]);	       \
+		}							       \
+		spin_unlock_irqrestore(&priv->reg_lock, flags);		       \
+	}								       \
+									       \
+	return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED :  \
+								IRQ_NONE;      \
+}
+
+DEF_TALITOS1_INTERRUPT(4ch, TALITOS1_ISR_4CHDONE, TALITOS1_ISR_4CHERR, 0)
+
+#define DEF_TALITOS2_INTERRUPT(name, ch_done_mask, ch_err_mask, tlet)	       \
+static irqreturn_t talitos2_interrupt_##name(int irq, void *data)	       \
 {									       \
 	struct device *dev = data;					       \
 	struct talitos_private *priv = dev_get_drvdata(dev);		       \
@@ -552,9 +701,12 @@ static irqreturn_t talitos_interrupt_##name(int irq, void *data)	       \
 	return (isr & (ch_done_mask | ch_err_mask) || isr_lo) ? IRQ_HANDLED :  \
 								IRQ_NONE;      \
 }
-DEF_TALITOS_INTERRUPT(4ch, TALITOS_ISR_4CHDONE, TALITOS_ISR_4CHERR, 0)
-DEF_TALITOS_INTERRUPT(ch0_2, TALITOS_ISR_CH_0_2_DONE, TALITOS_ISR_CH_0_2_ERR, 0)
-DEF_TALITOS_INTERRUPT(ch1_3, TALITOS_ISR_CH_1_3_DONE, TALITOS_ISR_CH_1_3_ERR, 1)
+
+DEF_TALITOS2_INTERRUPT(4ch, TALITOS2_ISR_4CHDONE, TALITOS2_ISR_4CHERR, 0)
+DEF_TALITOS2_INTERRUPT(ch0_2, TALITOS2_ISR_CH_0_2_DONE, TALITOS2_ISR_CH_0_2_ERR,
+		       0)
+DEF_TALITOS2_INTERRUPT(ch1_3, TALITOS2_ISR_CH_1_3_DONE, TALITOS2_ISR_CH_1_3_ERR,
+		       1)
 
 /*
  * hwrng
@@ -567,7 +719,7 @@ static int talitos_rng_data_present(struct hwrng *rng, int wait)
 	int i;
 
 	for (i = 0; i < 20; i++) {
-		ofl = in_be32(priv->reg + TALITOS_RNGUSR_LO) &
+		ofl = in_be32(priv->reg_rngu + TALITOS_EUSR_LO) &
 		      TALITOS_RNGUSR_LO_OFL;
 		if (ofl || !wait)
 			break;
@@ -583,8 +735,8 @@ static int talitos_rng_data_read(struct hwrng *rng, u32 *data)
 	struct talitos_private *priv = dev_get_drvdata(dev);
 
 	/* rng fifo requires 64-bit accesses */
-	*data = in_be32(priv->reg + TALITOS_RNGU_FIFO);
-	*data = in_be32(priv->reg + TALITOS_RNGU_FIFO_LO);
+	*data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO);
+	*data = in_be32(priv->reg_rngu + TALITOS_EU_FIFO_LO);
 
 	return sizeof(u32);
 }
@@ -595,8 +747,9 @@ static int talitos_rng_init(struct hwrng *rng)
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	unsigned int timeout = TALITOS_TIMEOUT;
 
-	setbits32(priv->reg + TALITOS_RNGURCR_LO, TALITOS_RNGURCR_LO_SR);
-	while (!(in_be32(priv->reg + TALITOS_RNGUSR_LO) & TALITOS_RNGUSR_LO_RD)
+	setbits32(priv->reg_rngu + TALITOS_EURCR_LO, TALITOS_RNGURCR_LO_SR);
+	while (!(in_be32(priv->reg_rngu + TALITOS_EUSR_LO)
+		 & TALITOS_RNGUSR_LO_RD)
 	       && --timeout)
 		cpu_relax();
 	if (timeout == 0) {
@@ -605,7 +758,7 @@ static int talitos_rng_init(struct hwrng *rng)
 	}
 
 	/* start generating */
-	setbits32(priv->reg + TALITOS_RNGUDSR_LO, 0);
+	setbits32(priv->reg_rngu + TALITOS_EUDSR_LO, 0);
 
 	return 0;
 }
@@ -661,7 +814,7 @@ struct talitos_ahash_req_ctx {
 	unsigned int first;
 	unsigned int last;
 	unsigned int to_hash_later;
-	u64 nbuf;
+	unsigned int nbuf;
 	struct scatterlist bufsl[2];
 	struct scatterlist *psrc;
 };
@@ -712,9 +865,10 @@ badkey:
  * @dst_chained: whether dst is chained or not
  * @iv_dma: dma address of iv for checking continuity and link table
  * @dma_len: length of dma mapped link_tbl space
- * @dma_link_tbl: bus physical address of link_tbl
+ * @dma_link_tbl: bus physical address of link_tbl/buf
  * @desc: h/w descriptor
- * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1)
+ * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
+ * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
  *
  * if decrypting (with authcheck), or either one of src_nents or dst_nents
  * is greater than 1, an integrity check value is concatenated to the end
@@ -731,7 +885,10 @@ struct talitos_edesc {
 	int dma_len;
 	dma_addr_t dma_link_tbl;
 	struct talitos_desc desc;
-	struct talitos_ptr link_tbl[0];
+	union {
+		struct talitos_ptr link_tbl[0];
+		u8 buf[0];
+	};
 };
 
 static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
@@ -907,8 +1064,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 {
 	int n_sg = sg_count;
 
-	while (n_sg--) {
-		to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg));
+	while (sg && n_sg--) {
+		to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0);
 		link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg));
 		link_tbl_ptr->j_extent = 0;
 		link_tbl_ptr++;
@@ -925,7 +1082,8 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count,
 		sg_count--;
 		link_tbl_ptr--;
 	}
-	be16_add_cpu(&link_tbl_ptr->len, cryptlen);
+	link_tbl_ptr->len = cpu_to_be16(be16_to_cpu(link_tbl_ptr->len)
+					+ cryptlen);
 
 	/* tag end of link table */
 	link_tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
@@ -953,7 +1111,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 
 	/* hmac key */
 	map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key,
-			       0, DMA_TO_DEVICE);
+			       DMA_TO_DEVICE);
 
 	/* hmac data */
 	desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize);
@@ -962,7 +1120,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
 		to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
-			       sizeof(struct talitos_ptr));
+			       sizeof(struct talitos_ptr), 0);
 		desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
 
 		/* assoc_nents - 1 entries for assoc, 1 for IV */
@@ -973,7 +1131,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		tbl_ptr += sg_count - 1;
 		tbl_ptr->j_extent = 0;
 		tbl_ptr++;
-		to_talitos_ptr(tbl_ptr, edesc->iv_dma);
+		to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0);
 		tbl_ptr->len = cpu_to_be16(ivsize);
 		tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
 
@@ -982,14 +1140,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	} else {
 		if (areq->assoclen)
 			to_talitos_ptr(&desc->ptr[1],
-				       sg_dma_address(areq->assoc));
+				       sg_dma_address(areq->assoc), 0);
 		else
-			to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
+			to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, 0);
 		desc->ptr[1].j_extent = 0;
 	}
 
 	/* cipher iv */
-	to_talitos_ptr(&desc->ptr[2], edesc->iv_dma);
+	to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0);
 	desc->ptr[2].len = cpu_to_be16(ivsize);
 	desc->ptr[2].j_extent = 0;
 	/* Sync needed for the aead_givencrypt case */
@@ -997,7 +1155,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 
 	/* cipher key */
 	map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen,
-			       (char *)&ctx->key + ctx->authkeylen, 0,
+			       (char *)&ctx->key + ctx->authkeylen,
 			       DMA_TO_DEVICE);
 
 	/*
@@ -1015,7 +1173,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 				  edesc->src_chained);
 
 	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src));
+		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0);
 	} else {
 		sg_link_tbl_len = cryptlen;
 
@@ -1026,14 +1184,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 					  &edesc->link_tbl[0]);
 		if (sg_count > 1) {
 			desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
-			to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl);
+			to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl, 0);
 			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
 						   edesc->dma_len,
 						   DMA_BIDIRECTIONAL);
 		} else {
 			/* Only one segment now, so no link tbl needed */
 			to_talitos_ptr(&desc->ptr[4],
-				       sg_dma_address(areq->src));
+				       sg_dma_address(areq->src), 0);
 		}
 	}
 
@@ -1047,13 +1205,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 					  DMA_FROM_DEVICE, edesc->dst_chained);
 
 	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst));
+		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0);
 	} else {
 		int tbl_off = edesc->src_nents + 1;
 		struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
 		to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl +
-			       tbl_off * sizeof(struct talitos_ptr));
+			       tbl_off * sizeof(struct talitos_ptr), 0);
 		sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
 					  tbl_ptr);
 
@@ -1068,14 +1226,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl +
 			       (tbl_off + edesc->dst_nents + 1 +
 				edesc->assoc_nents) *
-			       sizeof(struct talitos_ptr));
+			       sizeof(struct talitos_ptr), 0);
 		desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP;
 		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
 					   edesc->dma_len, DMA_BIDIRECTIONAL);
 	}
 
 	/* iv out */
-	map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0,
+	map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
 			       DMA_FROM_DEVICE);
 
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
@@ -1095,7 +1253,7 @@ static int sg_count(struct scatterlist *sg_list, int nbytes, bool *chained)
 	int sg_nents = 0;
 
 	*chained = false;
-	while (nbytes > 0) {
+	while (nbytes > 0 && sg) {
 		sg_nents++;
 		nbytes -= sg->length;
 		if (!sg_is_last(sg) && (sg + 1)->length == 0)
@@ -1128,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	dma_addr_t iv_dma = 0;
 	gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
 		      GFP_ATOMIC;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+	int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN;
 
-	if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
+	if (cryptlen + authsize > max_len) {
 		dev_err(dev, "length exceeds h/w max limit\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -1173,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev,
 	 */
 	alloc_len = sizeof(struct talitos_edesc);
 	if (assoc_nents || src_nents || dst_nents) {
-		dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
-			  sizeof(struct talitos_ptr) + authsize;
+		if (is_sec1)
+			dma_len = (src_nents ? cryptlen : 0) +
+				  (dst_nents ? cryptlen : 0);
+		else
+			dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
+				  sizeof(struct talitos_ptr) + authsize;
 		alloc_len += dma_len;
 	} else {
 		dma_len = 0;
@@ -1327,16 +1492,43 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	return 0;
 }
 
+static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src,
+				 struct scatterlist *dst, unsigned int len,
+				 struct talitos_edesc *edesc)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	if (is_sec1) {
+		if (!edesc->src_nents) {
+			dma_unmap_sg(dev, src, 1,
+				     dst != src ? DMA_TO_DEVICE
+						: DMA_BIDIRECTIONAL);
+		}
+		if (dst && edesc->dst_nents) {
+			dma_sync_single_for_device(dev,
+						   edesc->dma_link_tbl + len,
+						   len, DMA_FROM_DEVICE);
+			sg_copy_from_buffer(dst, edesc->dst_nents ? : 1,
+					    edesc->buf + len, len);
+		} else if (dst && dst != src) {
+			dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE);
+		}
+	} else {
+		talitos_sg_unmap(dev, edesc, src, dst);
+	}
+}
+
 static void common_nonsnoop_unmap(struct device *dev,
 				  struct talitos_edesc *edesc,
 				  struct ablkcipher_request *areq)
 {
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+
+	unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
-
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 				 DMA_BIDIRECTIONAL);
@@ -1358,6 +1550,102 @@ static void ablkcipher_done(struct device *dev,
 	areq->base.complete(&areq->base, err);
 }
 
+int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src,
+			  unsigned int len, struct talitos_edesc *edesc,
+			  enum dma_data_direction dir, struct talitos_ptr *ptr)
+{
+	int sg_count;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	to_talitos_ptr_len(ptr, len, is_sec1);
+
+	if (is_sec1) {
+		sg_count = edesc->src_nents ? : 1;
+
+		if (sg_count == 1) {
+			dma_map_sg(dev, src, 1, dir);
+			to_talitos_ptr(ptr, sg_dma_address(src), is_sec1);
+		} else {
+			sg_copy_to_buffer(src, sg_count, edesc->buf, len);
+			to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1);
+			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+						   len, DMA_TO_DEVICE);
+		}
+	} else {
+		to_talitos_ptr_extent_clear(ptr, is_sec1);
+
+		sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
+					  edesc->src_chained);
+
+		if (sg_count == 1) {
+			to_talitos_ptr(ptr, sg_dma_address(src), is_sec1);
+		} else {
+			sg_count = sg_to_link_tbl(src, sg_count, len,
+						  &edesc->link_tbl[0]);
+			if (sg_count > 1) {
+				to_talitos_ptr(ptr, edesc->dma_link_tbl, 0);
+				ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+				dma_sync_single_for_device(dev,
+							   edesc->dma_link_tbl,
+							   edesc->dma_len,
+							   DMA_BIDIRECTIONAL);
+			} else {
+				/* Only one segment now, so no link tbl needed*/
+				to_talitos_ptr(ptr, sg_dma_address(src),
+					       is_sec1);
+			}
+		}
+	}
+	return sg_count;
+}
+
+void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst,
+			    unsigned int len, struct talitos_edesc *edesc,
+			    enum dma_data_direction dir,
+			    struct talitos_ptr *ptr, int sg_count)
+{
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
+
+	if (dir != DMA_NONE)
+		sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
+					  dir, edesc->dst_chained);
+
+	to_talitos_ptr_len(ptr, len, is_sec1);
+
+	if (is_sec1) {
+		if (sg_count == 1) {
+			if (dir != DMA_NONE)
+				dma_map_sg(dev, dst, 1, dir);
+			to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1);
+		} else {
+			to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1);
+			dma_sync_single_for_device(dev,
+						   edesc->dma_link_tbl + len,
+						   len, DMA_FROM_DEVICE);
+		}
+	} else {
+		to_talitos_ptr_extent_clear(ptr, is_sec1);
+
+		if (sg_count == 1) {
+			to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1);
+		} else {
+			struct talitos_ptr *link_tbl_ptr =
+				&edesc->link_tbl[edesc->src_nents + 1];
+
+			to_talitos_ptr(ptr, edesc->dma_link_tbl +
+					    (edesc->src_nents + 1) *
+					     sizeof(struct talitos_ptr), 0);
+			ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+			sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr);
+			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+						   edesc->dma_len,
+						   DMA_BIDIRECTIONAL);
+		}
+	}
+}
+
 static int common_nonsnoop(struct talitos_edesc *edesc,
 			   struct ablkcipher_request *areq,
 			   void (*callback) (struct device *dev,
@@ -1371,83 +1659,41 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 	unsigned int cryptlen = areq->nbytes;
 	unsigned int ivsize = crypto_ablkcipher_ivsize(cipher);
 	int sg_count, ret;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	/* first DWORD empty */
-	desc->ptr[0].len = 0;
-	to_talitos_ptr(&desc->ptr[0], 0);
-	desc->ptr[0].j_extent = 0;
+	desc->ptr[0] = zero_entry;
 
 	/* cipher iv */
-	to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
-	desc->ptr[1].len = cpu_to_be16(ivsize);
-	desc->ptr[1].j_extent = 0;
+	to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1);
+	to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1);
+	to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1);
 
 	/* cipher key */
 	map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-			       (char *)&ctx->key, 0, DMA_TO_DEVICE);
+			       (char *)&ctx->key, DMA_TO_DEVICE);
 
 	/*
 	 * cipher in
 	 */
-	desc->ptr[3].len = cpu_to_be16(cryptlen);
-	desc->ptr[3].j_extent = 0;
-
-	sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
-				  (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-							   : DMA_TO_DEVICE,
-				  edesc->src_chained);
-
-	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[3], sg_dma_address(areq->src));
-	} else {
-		sg_count = sg_to_link_tbl(areq->src, sg_count, cryptlen,
-					  &edesc->link_tbl[0]);
-		if (sg_count > 1) {
-			to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl);
-			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
-			dma_sync_single_for_device(dev, edesc->dma_link_tbl,
-						   edesc->dma_len,
-						   DMA_BIDIRECTIONAL);
-		} else {
-			/* Only one segment now, so no link tbl needed */
-			to_talitos_ptr(&desc->ptr[3],
-				       sg_dma_address(areq->src));
-		}
-	}
+	sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc,
+					 (areq->src == areq->dst) ?
+					  DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
+					  &desc->ptr[3]);
 
 	/* cipher out */
-	desc->ptr[4].len = cpu_to_be16(cryptlen);
-	desc->ptr[4].j_extent = 0;
-
-	if (areq->src != areq->dst)
-		sg_count = talitos_map_sg(dev, areq->dst,
-					  edesc->dst_nents ? : 1,
-					  DMA_FROM_DEVICE, edesc->dst_chained);
-
-	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->dst));
-	} else {
-		struct talitos_ptr *link_tbl_ptr =
-			&edesc->link_tbl[edesc->src_nents + 1];
-
-		to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl +
-					      (edesc->src_nents + 1) *
-					      sizeof(struct talitos_ptr));
-		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
-		sg_count = sg_to_link_tbl(areq->dst, sg_count, cryptlen,
-					  link_tbl_ptr);
-		dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl,
-					   edesc->dma_len, DMA_BIDIRECTIONAL);
-	}
+	map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc,
+			       (areq->src == areq->dst) ? DMA_NONE
+							: DMA_FROM_DEVICE,
+			       &desc->ptr[4], sg_count);
 
 	/* iv out */
-	map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
+	map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv,
 			       DMA_FROM_DEVICE);
 
 	/* last DWORD empty */
-	desc->ptr[6].len = 0;
-	to_talitos_ptr(&desc->ptr[6], 0);
-	desc->ptr[6].j_extent = 0;
+	desc->ptr[6] = zero_entry;
 
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
 	if (ret != -EINPROGRESS) {
@@ -1507,20 +1753,22 @@ static void common_nonsnoop_hash_unmap(struct device *dev,
 				       struct ahash_request *areq)
 {
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
 
+	unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc);
+
 	/* When using hashctx-in, must unmap it. */
-	if (edesc->desc.ptr[1].len)
+	if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1))
 		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
 					 DMA_TO_DEVICE);
 
-	if (edesc->desc.ptr[2].len)
+	if (from_talitos_ptr_len(&edesc->desc.ptr[2], is_sec1))
 		unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2],
 					 DMA_TO_DEVICE);
 
-	talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL);
-
 	if (edesc->dma_len)
 		dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 				 DMA_BIDIRECTIONAL);
@@ -1548,6 +1796,27 @@ static void ahash_done(struct device *dev,
 	areq->base.complete(&areq->base, err);
 }
 
+/*
+ * SEC1 doesn't like hashing of 0 sized message, so we do the padding
+ * ourself and submit a padded block
+ */
+void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
+			       struct talitos_edesc *edesc,
+			       struct talitos_ptr *ptr)
+{
+	static u8 padded_hash[64] = {
+		0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	};
+
+	pr_err_once("Bug in SEC1, padding ourself\n");
+	edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD;
+	map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash),
+			       (char *)padded_hash, DMA_TO_DEVICE);
+}
+
 static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 				struct ahash_request *areq, unsigned int length,
 				void (*callback) (struct device *dev,
@@ -1559,7 +1828,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	struct talitos_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
 	struct device *dev = ctx->dev;
 	struct talitos_desc *desc = &edesc->desc;
-	int sg_count, ret;
+	int ret;
+	struct talitos_private *priv = dev_get_drvdata(dev);
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	/* first DWORD empty */
 	desc->ptr[0] = zero_entry;
@@ -1568,7 +1839,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	if (!req_ctx->first || req_ctx->swinit) {
 		map_single_talitos_ptr(dev, &desc->ptr[1],
 				       req_ctx->hw_context_size,
-				       (char *)req_ctx->hw_context, 0,
+				       (char *)req_ctx->hw_context,
 				       DMA_TO_DEVICE);
 		req_ctx->swinit = 0;
 	} else {
@@ -1580,38 +1851,15 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	/* HMAC key */
 	if (ctx->keylen)
 		map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-				       (char *)&ctx->key, 0, DMA_TO_DEVICE);
+				       (char *)&ctx->key, DMA_TO_DEVICE);
 	else
 		desc->ptr[2] = zero_entry;
 
 	/*
 	 * data in
 	 */
-	desc->ptr[3].len = cpu_to_be16(length);
-	desc->ptr[3].j_extent = 0;
-
-	sg_count = talitos_map_sg(dev, req_ctx->psrc,
-				  edesc->src_nents ? : 1,
-				  DMA_TO_DEVICE, edesc->src_chained);
-
-	if (sg_count == 1) {
-		to_talitos_ptr(&desc->ptr[3], sg_dma_address(req_ctx->psrc));
-	} else {
-		sg_count = sg_to_link_tbl(req_ctx->psrc, sg_count, length,
-					  &edesc->link_tbl[0]);
-		if (sg_count > 1) {
-			desc->ptr[3].j_extent |= DESC_PTR_LNKTBL_JUMP;
-			to_talitos_ptr(&desc->ptr[3], edesc->dma_link_tbl);
-			dma_sync_single_for_device(ctx->dev,
-						   edesc->dma_link_tbl,
-						   edesc->dma_len,
-						   DMA_BIDIRECTIONAL);
-		} else {
-			/* Only one segment now, so no link tbl needed */
-			to_talitos_ptr(&desc->ptr[3],
-				       sg_dma_address(req_ctx->psrc));
-		}
-	}
+	map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc,
+			      DMA_TO_DEVICE, &desc->ptr[3]);
 
 	/* fifth DWORD empty */
 	desc->ptr[4] = zero_entry;
@@ -1620,15 +1868,18 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc,
 	if (req_ctx->last)
 		map_single_talitos_ptr(dev, &desc->ptr[5],
 				       crypto_ahash_digestsize(tfm),
-				       areq->result, 0, DMA_FROM_DEVICE);
+				       areq->result, DMA_FROM_DEVICE);
 	else
 		map_single_talitos_ptr(dev, &desc->ptr[5],
 				       req_ctx->hw_context_size,
-				       req_ctx->hw_context, 0, DMA_FROM_DEVICE);
+				       req_ctx->hw_context, DMA_FROM_DEVICE);
 
 	/* last DWORD empty */
 	desc->ptr[6] = zero_entry;
 
+	if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0)
+		talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
+
 	ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
 	if (ret != -EINPROGRESS) {
 		common_nonsnoop_hash_unmap(dev, edesc, areq);
@@ -2561,6 +2812,7 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev,
 		break;
 	default:
 		dev_err(dev, "unknown algorithm type %d\n", t_alg->algt.type);
+		kfree(t_alg);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -2581,29 +2833,35 @@ static int talitos_probe_irq(struct platform_device *ofdev)
 	struct device_node *np = ofdev->dev.of_node;
 	struct talitos_private *priv = dev_get_drvdata(dev);
 	int err;
+	bool is_sec1 = has_ftr_sec1(priv);
 
 	priv->irq[0] = irq_of_parse_and_map(np, 0);
 	if (!priv->irq[0]) {
 		dev_err(dev, "failed to map irq\n");
 		return -EINVAL;
 	}
+	if (is_sec1) {
+		err = request_irq(priv->irq[0], talitos1_interrupt_4ch, 0,
+				  dev_driver_string(dev), dev);
+		goto primary_out;
+	}
 
 	priv->irq[1] = irq_of_parse_and_map(np, 1);
 
 	/* get the primary irq line */
 	if (!priv->irq[1]) {
-		err = request_irq(priv->irq[0], talitos_interrupt_4ch, 0,
+		err = request_irq(priv->irq[0], talitos2_interrupt_4ch, 0,
 				  dev_driver_string(dev), dev);
 		goto primary_out;
 	}
 
-	err = request_irq(priv->irq[0], talitos_interrupt_ch0_2, 0,
+	err = request_irq(priv->irq[0], talitos2_interrupt_ch0_2, 0,
 			  dev_driver_string(dev), dev);
 	if (err)
 		goto primary_out;
 
 	/* get the secondary irq line */
-	err = request_irq(priv->irq[1], talitos_interrupt_ch1_3, 0,
+	err = request_irq(priv->irq[1], talitos2_interrupt_ch1_3, 0,
 			  dev_driver_string(dev), dev);
 	if (err) {
 		dev_err(dev, "failed to request secondary irq\n");
@@ -2630,6 +2888,7 @@ static int talitos_probe(struct platform_device *ofdev)
 	struct talitos_private *priv;
 	const unsigned int *prop;
 	int i, err;
+	int stride;
 
 	priv = kzalloc(sizeof(struct talitos_private), GFP_KERNEL);
 	if (!priv)
@@ -2643,20 +2902,6 @@ static int talitos_probe(struct platform_device *ofdev)
 
 	spin_lock_init(&priv->reg_lock);
 
-	err = talitos_probe_irq(ofdev);
-	if (err)
-		goto err_out;
-
-	if (!priv->irq[1]) {
-		tasklet_init(&priv->done_task[0], talitos_done_4ch,
-			     (unsigned long)dev);
-	} else {
-		tasklet_init(&priv->done_task[0], talitos_done_ch0_2,
-			     (unsigned long)dev);
-		tasklet_init(&priv->done_task[1], talitos_done_ch1_3,
-			     (unsigned long)dev);
-	}
-
 	priv->reg = of_iomap(np, 0);
 	if (!priv->reg) {
 		dev_err(dev, "failed to of_iomap\n");
@@ -2696,6 +2941,53 @@ static int talitos_probe(struct platform_device *ofdev)
 				  TALITOS_FTR_SHA224_HWINIT |
 				  TALITOS_FTR_HMAC_OK;
 
+	if (of_device_is_compatible(np, "fsl,sec1.0"))
+		priv->features |= TALITOS_FTR_SEC1;
+
+	if (of_device_is_compatible(np, "fsl,sec1.2")) {
+		priv->reg_deu = priv->reg + TALITOS12_DEU;
+		priv->reg_aesu = priv->reg + TALITOS12_AESU;
+		priv->reg_mdeu = priv->reg + TALITOS12_MDEU;
+		stride = TALITOS1_CH_STRIDE;
+	} else if (of_device_is_compatible(np, "fsl,sec1.0")) {
+		priv->reg_deu = priv->reg + TALITOS10_DEU;
+		priv->reg_aesu = priv->reg + TALITOS10_AESU;
+		priv->reg_mdeu = priv->reg + TALITOS10_MDEU;
+		priv->reg_afeu = priv->reg + TALITOS10_AFEU;
+		priv->reg_rngu = priv->reg + TALITOS10_RNGU;
+		priv->reg_pkeu = priv->reg + TALITOS10_PKEU;
+		stride = TALITOS1_CH_STRIDE;
+	} else {
+		priv->reg_deu = priv->reg + TALITOS2_DEU;
+		priv->reg_aesu = priv->reg + TALITOS2_AESU;
+		priv->reg_mdeu = priv->reg + TALITOS2_MDEU;
+		priv->reg_afeu = priv->reg + TALITOS2_AFEU;
+		priv->reg_rngu = priv->reg + TALITOS2_RNGU;
+		priv->reg_pkeu = priv->reg + TALITOS2_PKEU;
+		priv->reg_keu = priv->reg + TALITOS2_KEU;
+		priv->reg_crcu = priv->reg + TALITOS2_CRCU;
+		stride = TALITOS2_CH_STRIDE;
+	}
+
+	err = talitos_probe_irq(ofdev);
+	if (err)
+		goto err_out;
+
+	if (of_device_is_compatible(np, "fsl,sec1.0")) {
+		tasklet_init(&priv->done_task[0], talitos1_done_4ch,
+			     (unsigned long)dev);
+	} else {
+		if (!priv->irq[1]) {
+			tasklet_init(&priv->done_task[0], talitos2_done_4ch,
+				     (unsigned long)dev);
+		} else {
+			tasklet_init(&priv->done_task[0], talitos2_done_ch0_2,
+				     (unsigned long)dev);
+			tasklet_init(&priv->done_task[1], talitos2_done_ch1_3,
+				     (unsigned long)dev);
+		}
+	}
+
 	priv->chan = kzalloc(sizeof(struct talitos_channel) *
 			     priv->num_channels, GFP_KERNEL);
 	if (!priv->chan) {
@@ -2707,7 +2999,7 @@ static int talitos_probe(struct platform_device *ofdev)
 	priv->fifo_len = roundup_pow_of_two(priv->chfifo_len);
 
 	for (i = 0; i < priv->num_channels; i++) {
-		priv->chan[i].reg = priv->reg + TALITOS_CH_STRIDE * (i + 1);
+		priv->chan[i].reg = priv->reg + stride * (i + 1);
 		if (!priv->irq[1] || !(i & 1))
 			priv->chan[i].reg += TALITOS_CH_BASE_OFFSET;
 
@@ -2794,9 +3086,16 @@ err_out:
 }
 
 static const struct of_device_id talitos_match[] = {
+#ifdef CONFIG_CRYPTO_DEV_TALITOS1
+	{
+		.compatible = "fsl,sec1.0",
+	},
+#endif
+#ifdef CONFIG_CRYPTO_DEV_TALITOS2
 	{
 		.compatible = "fsl,sec2.0",
 	},
+#endif
 	{},
 };
 MODULE_DEVICE_TABLE(of, talitos_match);
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 61a1405..314daf5 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -29,7 +29,8 @@
  */
 
 #define TALITOS_TIMEOUT 100000
-#define TALITOS_MAX_DATA_LEN 65535
+#define TALITOS1_MAX_DATA_LEN 32768
+#define TALITOS2_MAX_DATA_LEN 65535
 
 #define DESC_TYPE(desc_hdr) ((be32_to_cpu(desc_hdr) >> 3) & 0x1f)
 #define PRIMARY_EU(desc_hdr) ((be32_to_cpu(desc_hdr) >> 28) & 0xf)
@@ -37,9 +38,17 @@
 
 /* descriptor pointer entry */
 struct talitos_ptr {
-	__be16 len;     /* length */
-	u8 j_extent;    /* jump to sg link table and/or extent */
-	u8 eptr;        /* extended address */
+	union {
+		struct {		/* SEC2 format */
+			__be16 len;     /* length */
+			u8 j_extent;    /* jump to sg link table and/or extent*/
+			u8 eptr;        /* extended address */
+		};
+		struct {			/* SEC1 format */
+			__be16 res;
+			__be16 len1;	/* length */
+		};
+	};
 	__be32 ptr;     /* address */
 };
 
@@ -53,10 +62,16 @@ static const struct talitos_ptr zero_entry = {
 /* descriptor */
 struct talitos_desc {
 	__be32 hdr;                     /* header high bits */
-	__be32 hdr_lo;                  /* header low bits */
+	union {
+		__be32 hdr_lo;		/* header low bits */
+		__be32 hdr1;		/* header for SEC1 */
+	};
 	struct talitos_ptr ptr[7];      /* ptr/len pair array */
+	__be32 next_desc;		/* next descriptor (SEC1) */
 };
 
+#define TALITOS_DESC_SIZE	(sizeof(struct talitos_desc) - sizeof(__be32))
+
 /**
  * talitos_request - descriptor submission request
  * @desc: descriptor pointer (kernel virtual)
@@ -97,6 +112,14 @@ struct talitos_private {
 	struct device *dev;
 	struct platform_device *ofdev;
 	void __iomem *reg;
+	void __iomem *reg_deu;
+	void __iomem *reg_aesu;
+	void __iomem *reg_mdeu;
+	void __iomem *reg_afeu;
+	void __iomem *reg_rngu;
+	void __iomem *reg_pkeu;
+	void __iomem *reg_keu;
+	void __iomem *reg_crcu;
 	int irq[2];
 
 	/* SEC global registers lock  */
@@ -144,49 +167,80 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 #define TALITOS_FTR_HW_AUTH_CHECK 0x00000002
 #define TALITOS_FTR_SHA224_HWINIT 0x00000004
 #define TALITOS_FTR_HMAC_OK 0x00000008
+#define TALITOS_FTR_SEC1 0x00000010
+
+/*
+ * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are
+ * defined, we check the features which are set according to the device tree.
+ * Otherwise, we answer true or false directly
+ */
+static inline bool has_ftr_sec1(struct talitos_private *priv)
+{
+#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2)
+	return priv->features & TALITOS_FTR_SEC1 ? true : false;
+#elif defined(CONFIG_CRYPTO_DEV_TALITOS1)
+	return true;
+#else
+	return false;
+#endif
+}
 
 /*
  * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register
  */
 
+#define ISR1_FORMAT(x)			(((x) << 28) | ((x) << 16))
+#define ISR2_FORMAT(x)			(((x) << 4) | (x))
+
 /* global register offset addresses */
 #define TALITOS_MCR			0x1030  /* master control register */
 #define   TALITOS_MCR_RCA0		(1 << 15) /* remap channel 0 */
 #define   TALITOS_MCR_RCA1		(1 << 14) /* remap channel 1 */
 #define   TALITOS_MCR_RCA2		(1 << 13) /* remap channel 2 */
 #define   TALITOS_MCR_RCA3		(1 << 12) /* remap channel 3 */
-#define   TALITOS_MCR_SWR		0x1     /* s/w reset */
+#define   TALITOS1_MCR_SWR		0x1000000     /* s/w reset */
+#define   TALITOS2_MCR_SWR		0x1     /* s/w reset */
 #define TALITOS_MCR_LO			0x1034
 #define TALITOS_IMR			0x1008  /* interrupt mask register */
-#define   TALITOS_IMR_INIT		0x100ff /* enable channel IRQs */
-#define   TALITOS_IMR_DONE		0x00055 /* done IRQs */
+/* enable channel IRQs */
+#define   TALITOS1_IMR_INIT		ISR1_FORMAT(0xf)
+#define   TALITOS1_IMR_DONE		ISR1_FORMAT(0x5) /* done IRQs */
+/* enable channel IRQs */
+#define   TALITOS2_IMR_INIT		(ISR2_FORMAT(0xf) | 0x10000)
+#define   TALITOS2_IMR_DONE		ISR1_FORMAT(0x5) /* done IRQs */
 #define TALITOS_IMR_LO			0x100C
-#define   TALITOS_IMR_LO_INIT		0x20000 /* allow RNGU error IRQs */
+#define   TALITOS1_IMR_LO_INIT		0x2000000 /* allow RNGU error IRQs */
+#define   TALITOS2_IMR_LO_INIT		0x20000 /* allow RNGU error IRQs */
 #define TALITOS_ISR			0x1010  /* interrupt status register */
-#define   TALITOS_ISR_4CHERR		0xaa    /* 4 channel errors mask */
-#define   TALITOS_ISR_4CHDONE		0x55    /* 4 channel done mask */
-#define   TALITOS_ISR_CH_0_2_ERR	0x22    /* channels 0, 2 errors mask */
-#define   TALITOS_ISR_CH_0_2_DONE	0x11    /* channels 0, 2 done mask */
-#define   TALITOS_ISR_CH_1_3_ERR	0x88    /* channels 1, 3 errors mask */
-#define   TALITOS_ISR_CH_1_3_DONE	0x44    /* channels 1, 3 done mask */
+#define   TALITOS1_ISR_4CHERR		ISR1_FORMAT(0xa) /* 4 ch errors mask */
+#define   TALITOS1_ISR_4CHDONE		ISR1_FORMAT(0x5) /* 4 ch done mask */
+#define   TALITOS1_ISR_TEA_ERR		0x00000040
+#define   TALITOS2_ISR_4CHERR		ISR2_FORMAT(0xa) /* 4 ch errors mask */
+#define   TALITOS2_ISR_4CHDONE		ISR2_FORMAT(0x5) /* 4 ch done mask */
+#define   TALITOS2_ISR_CH_0_2_ERR	ISR2_FORMAT(0x2) /* ch 0, 2 err mask */
+#define   TALITOS2_ISR_CH_0_2_DONE	ISR2_FORMAT(0x1) /* ch 0, 2 done mask */
+#define   TALITOS2_ISR_CH_1_3_ERR	ISR2_FORMAT(0x8) /* ch 1, 3 err mask */
+#define   TALITOS2_ISR_CH_1_3_DONE	ISR2_FORMAT(0x4) /* ch 1, 3 done mask */
 #define TALITOS_ISR_LO			0x1014
 #define TALITOS_ICR			0x1018  /* interrupt clear register */
 #define TALITOS_ICR_LO			0x101C
 
 /* channel register address stride */
 #define TALITOS_CH_BASE_OFFSET		0x1000	/* default channel map base */
-#define TALITOS_CH_STRIDE		0x100
+#define TALITOS1_CH_STRIDE		0x1000
+#define TALITOS2_CH_STRIDE		0x100
 
 /* channel configuration register  */
 #define TALITOS_CCCR			0x8
-#define   TALITOS_CCCR_CONT		0x2    /* channel continue */
-#define   TALITOS_CCCR_RESET		0x1    /* channel reset */
+#define   TALITOS2_CCCR_CONT		0x2    /* channel continue on SEC2 */
+#define   TALITOS2_CCCR_RESET		0x1    /* channel reset on SEC2 */
 #define TALITOS_CCCR_LO			0xc
 #define   TALITOS_CCCR_LO_IWSE		0x80   /* chan. ICCR writeback enab. */
 #define   TALITOS_CCCR_LO_EAE		0x20   /* extended address enable */
 #define   TALITOS_CCCR_LO_CDWE		0x10   /* chan. done writeback enab. */
 #define   TALITOS_CCCR_LO_NT		0x4    /* notification type */
 #define   TALITOS_CCCR_LO_CDIE		0x2    /* channel done IRQ enable */
+#define   TALITOS1_CCCR_LO_RESET	0x1    /* channel reset on SEC1 */
 
 /* CCPSR: channel pointer status register */
 #define TALITOS_CCPSR			0x10
@@ -224,37 +278,48 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc,
 #define TALITOS_SCATTER			0xe0
 #define TALITOS_SCATTER_LO		0xe4
 
+/* execution unit registers base */
+#define TALITOS2_DEU			0x2000
+#define TALITOS2_AESU			0x4000
+#define TALITOS2_MDEU			0x6000
+#define TALITOS2_AFEU			0x8000
+#define TALITOS2_RNGU			0xa000
+#define TALITOS2_PKEU			0xc000
+#define TALITOS2_KEU			0xe000
+#define TALITOS2_CRCU			0xf000
+
+#define TALITOS12_AESU			0x4000
+#define TALITOS12_DEU			0x5000
+#define TALITOS12_MDEU			0x6000
+
+#define TALITOS10_AFEU			0x8000
+#define TALITOS10_DEU			0xa000
+#define TALITOS10_MDEU			0xc000
+#define TALITOS10_RNGU			0xe000
+#define TALITOS10_PKEU			0x10000
+#define TALITOS10_AESU			0x12000
+
 /* execution unit interrupt status registers */
-#define TALITOS_DEUISR			0x2030 /* DES unit */
-#define TALITOS_DEUISR_LO		0x2034
-#define TALITOS_AESUISR			0x4030 /* AES unit */
-#define TALITOS_AESUISR_LO		0x4034
-#define TALITOS_MDEUISR			0x6030 /* message digest unit */
-#define TALITOS_MDEUISR_LO		0x6034
-#define TALITOS_MDEUICR			0x6038 /* interrupt control */
-#define TALITOS_MDEUICR_LO		0x603c
+#define TALITOS_EUDSR			0x10	/* data size */
+#define TALITOS_EUDSR_LO		0x14
+#define TALITOS_EURCR			0x18 /* reset control*/
+#define TALITOS_EURCR_LO		0x1c
+#define TALITOS_EUSR			0x28 /* rng status */
+#define TALITOS_EUSR_LO			0x2c
+#define TALITOS_EUISR			0x30
+#define TALITOS_EUISR_LO		0x34
+#define TALITOS_EUICR			0x38 /* int. control */
+#define TALITOS_EUICR_LO		0x3c
+#define TALITOS_EU_FIFO			0x800 /* output FIFO */
+#define TALITOS_EU_FIFO_LO		0x804 /* output FIFO */
+/* DES unit */
+#define   TALITOS1_DEUICR_KPE		0x00200000 /* Key Parity Error */
+/* message digest unit */
 #define   TALITOS_MDEUICR_LO_ICE	0x4000 /* integrity check IRQ enable */
-#define TALITOS_AFEUISR			0x8030 /* arc4 unit */
-#define TALITOS_AFEUISR_LO		0x8034
-#define TALITOS_RNGUISR			0xa030 /* random number unit */
-#define TALITOS_RNGUISR_LO		0xa034
-#define TALITOS_RNGUSR			0xa028 /* rng status */
-#define TALITOS_RNGUSR_LO		0xa02c
+/* random number unit */
 #define   TALITOS_RNGUSR_LO_RD		0x1	/* reset done */
 #define   TALITOS_RNGUSR_LO_OFL		0xff0000/* output FIFO length */
-#define TALITOS_RNGUDSR			0xa010	/* data size */
-#define TALITOS_RNGUDSR_LO		0xa014
-#define TALITOS_RNGU_FIFO		0xa800	/* output FIFO */
-#define TALITOS_RNGU_FIFO_LO		0xa804	/* output FIFO */
-#define TALITOS_RNGURCR			0xa018	/* reset control */
-#define TALITOS_RNGURCR_LO		0xa01c
 #define   TALITOS_RNGURCR_LO_SR		0x1	/* software reset */
-#define TALITOS_PKEUISR			0xc030 /* public key unit */
-#define TALITOS_PKEUISR_LO		0xc034
-#define TALITOS_KEUISR			0xe030 /* kasumi unit */
-#define TALITOS_KEUISR_LO		0xe034
-#define TALITOS_CRCUISR			0xf030 /* cyclic redundancy check unit*/
-#define TALITOS_CRCUISR_LO		0xf034
 
 #define TALITOS_MDEU_CONTEXT_SIZE_MD5_SHA1_SHA256	0x28
 #define TALITOS_MDEU_CONTEXT_SIZE_SHA384_SHA512		0x48
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index c699c6e..d28ab96 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -4,7 +4,7 @@ vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o gha
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 TARGET := linux-ppc64le
 else
-TARGET := linux-pcc64
+TARGET := linux-ppc64
 endif
 
 quiet_cmd_perl = PERL $@
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
index 44d8d5c..4c398dd 100644
--- a/drivers/crypto/vmx/vmx.c
+++ b/drivers/crypto/vmx/vmx.c
@@ -82,7 +82,7 @@ module_init(p8_init);
 module_exit(p8_exit);
 
 MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
-MODULE_DESCRIPTION("IBM VMX cryptogaphic acceleration instructions support on Power 8");
+MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions support on Power 8");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0.0");
 
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 94b19be..94141dc 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -18,6 +18,64 @@
 #include <linux/slab.h>
 
 /**
+ * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API
+ *
+ * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD
+ * (listed as type "aead" in /proc/crypto)
+ *
+ * The most prominent examples for this type of encryption is GCM and CCM.
+ * However, the kernel supports other types of AEAD ciphers which are defined
+ * with the following cipher string:
+ *
+ *	authenc(keyed message digest, block cipher)
+ *
+ * For example: authenc(hmac(sha256), cbc(aes))
+ *
+ * The example code provided for the asynchronous block cipher operation
+ * applies here as well. Naturally all *ablkcipher* symbols must be exchanged
+ * the *aead* pendants discussed in the following. In addtion, for the AEAD
+ * operation, the aead_request_set_assoc function must be used to set the
+ * pointer to the associated data memory location before performing the
+ * encryption or decryption operation. In case of an encryption, the associated
+ * data memory is filled during the encryption operation. For decryption, the
+ * associated data memory must contain data that is used to verify the integrity
+ * of the decrypted data. Another deviation from the asynchronous block cipher
+ * operation is that the caller should explicitly check for -EBADMSG of the
+ * crypto_aead_decrypt. That error indicates an authentication error, i.e.
+ * a breach in the integrity of the message. In essence, that -EBADMSG error
+ * code is the key bonus an AEAD cipher has over "standard" block chaining
+ * modes.
+ */
+
+/**
+ *	struct aead_request - AEAD request
+ *	@base: Common attributes for async crypto requests
+ *	@assoclen: Length in bytes of associated data for authentication
+ *	@cryptlen: Length of data to be encrypted or decrypted
+ *	@iv: Initialisation vector
+ *	@assoc: Associated data
+ *	@src: Source data
+ *	@dst: Destination data
+ *	@__ctx: Start of private context data
+ */
+struct aead_request {
+	struct crypto_async_request base;
+
+	bool old;
+
+	unsigned int assoclen;
+	unsigned int cryptlen;
+
+	u8 *iv;
+
+	struct scatterlist *assoc;
+	struct scatterlist *src;
+	struct scatterlist *dst;
+
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+/**
  *	struct aead_givcrypt_request - AEAD request with IV generation
  *	@seq: Sequence number for IV generation
  *	@giv: Space for generated IV
@@ -30,6 +88,453 @@ struct aead_givcrypt_request {
 	struct aead_request areq;
 };
 
+/**
+ * struct aead_alg - AEAD cipher definition
+ * @maxauthsize: Set the maximum authentication tag size supported by the
+ *		 transformation. A transformation may support smaller tag sizes.
+ *		 As the authentication tag is a message digest to ensure the
+ *		 integrity of the encrypted data, a consumer typically wants the
+ *		 largest authentication tag possible as defined by this
+ *		 variable.
+ * @setauthsize: Set authentication size for the AEAD transformation. This
+ *		 function is used to specify the consumer requested size of the
+ * 		 authentication tag to be either generated by the transformation
+ *		 during encryption or the size of the authentication tag to be
+ *		 supplied during the decryption operation. This function is also
+ *		 responsible for checking the authentication tag size for
+ *		 validity.
+ * @setkey: see struct ablkcipher_alg
+ * @encrypt: see struct ablkcipher_alg
+ * @decrypt: see struct ablkcipher_alg
+ * @geniv: see struct ablkcipher_alg
+ * @ivsize: see struct ablkcipher_alg
+ *
+ * All fields except @ivsize is mandatory and must be filled.
+ */
+struct aead_alg {
+	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
+	              unsigned int keylen);
+	int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize);
+	int (*encrypt)(struct aead_request *req);
+	int (*decrypt)(struct aead_request *req);
+
+	const char *geniv;
+
+	unsigned int ivsize;
+	unsigned int maxauthsize;
+
+	struct crypto_alg base;
+};
+
+struct crypto_aead {
+	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
+	              unsigned int keylen);
+	int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize);
+	int (*encrypt)(struct aead_request *req);
+	int (*decrypt)(struct aead_request *req);
+	int (*givencrypt)(struct aead_givcrypt_request *req);
+	int (*givdecrypt)(struct aead_givcrypt_request *req);
+
+	struct crypto_aead *child;
+
+	unsigned int authsize;
+	unsigned int reqsize;
+
+	struct crypto_tfm base;
+};
+
+static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm)
+{
+	return container_of(tfm, struct crypto_aead, base);
+}
+
+/**
+ * crypto_alloc_aead() - allocate AEAD cipher handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	     AEAD cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for an AEAD. The returned struct
+ * crypto_aead is the cipher handle that is required for any subsequent
+ * API invocation for that AEAD.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
+struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask);
+
+static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
+{
+	return &tfm->base;
+}
+
+/**
+ * crypto_free_aead() - zeroize and free aead handle
+ * @tfm: cipher handle to be freed
+ */
+static inline void crypto_free_aead(struct crypto_aead *tfm)
+{
+	crypto_destroy_tfm(tfm, crypto_aead_tfm(tfm));
+}
+
+static inline struct crypto_aead *crypto_aead_crt(struct crypto_aead *tfm)
+{
+	return tfm;
+}
+
+static inline struct old_aead_alg *crypto_old_aead_alg(struct crypto_aead *tfm)
+{
+	return &crypto_aead_tfm(tfm)->__crt_alg->cra_aead;
+}
+
+static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm)
+{
+	return container_of(crypto_aead_tfm(tfm)->__crt_alg,
+			    struct aead_alg, base);
+}
+
+static inline unsigned int crypto_aead_alg_ivsize(struct aead_alg *alg)
+{
+	return alg->base.cra_aead.encrypt ? alg->base.cra_aead.ivsize :
+					    alg->ivsize;
+}
+
+/**
+ * crypto_aead_ivsize() - obtain IV size
+ * @tfm: cipher handle
+ *
+ * The size of the IV for the aead referenced by the cipher handle is
+ * returned. This IV size may be zero if the cipher does not need an IV.
+ *
+ * Return: IV size in bytes
+ */
+static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm)
+{
+	return crypto_aead_alg_ivsize(crypto_aead_alg(tfm));
+}
+
+/**
+ * crypto_aead_authsize() - obtain maximum authentication data size
+ * @tfm: cipher handle
+ *
+ * The maximum size of the authentication data for the AEAD cipher referenced
+ * by the AEAD cipher handle is returned. The authentication data size may be
+ * zero if the cipher implements a hard-coded maximum.
+ *
+ * The authentication data may also be known as "tag value".
+ *
+ * Return: authentication data size / tag size in bytes
+ */
+static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm)
+{
+	return tfm->authsize;
+}
+
+/**
+ * crypto_aead_blocksize() - obtain block size of cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the AEAD referenced with the cipher handle is returned.
+ * The caller may use that information to allocate appropriate memory for the
+ * data returned by the encryption or decryption operation
+ *
+ * Return: block size of cipher
+ */
+static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm)
+{
+	return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm));
+}
+
+static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm)
+{
+	return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm));
+}
+
+static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm)
+{
+	return crypto_tfm_get_flags(crypto_aead_tfm(tfm));
+}
+
+static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags)
+{
+	crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags);
+}
+
+static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags)
+{
+	crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags);
+}
+
+/**
+ * crypto_aead_setkey() - set key for cipher
+ * @tfm: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the AEAD referenced by the cipher
+ * handle.
+ *
+ * Note, the key length determines the cipher type. Many block ciphers implement
+ * different cipher modes depending on the key size, such as AES-128 vs AES-192
+ * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
+ * is performed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
+int crypto_aead_setkey(struct crypto_aead *tfm,
+		       const u8 *key, unsigned int keylen);
+
+/**
+ * crypto_aead_setauthsize() - set authentication data size
+ * @tfm: cipher handle
+ * @authsize: size of the authentication data / tag in bytes
+ *
+ * Set the authentication data size / tag size. AEAD requires an authentication
+ * tag (or MAC) in addition to the associated data.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
+int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize);
+
+static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
+{
+	return __crypto_aead_cast(req->base.tfm);
+}
+
+/**
+ * crypto_aead_encrypt() - encrypt plaintext
+ * @req: reference to the aead_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Encrypt plaintext data using the aead_request handle. That data structure
+ * and how it is filled with data is discussed with the aead_request_*
+ * functions.
+ *
+ * IMPORTANT NOTE The encryption operation creates the authentication data /
+ *		  tag. That data is concatenated with the created ciphertext.
+ *		  The ciphertext memory size is therefore the given number of
+ *		  block cipher blocks + the size defined by the
+ *		  crypto_aead_setauthsize invocation. The caller must ensure
+ *		  that sufficient memory is available for the ciphertext and
+ *		  the authentication tag.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
+static inline int crypto_aead_encrypt(struct aead_request *req)
+{
+	return crypto_aead_reqtfm(req)->encrypt(req);
+}
+
+/**
+ * crypto_aead_decrypt() - decrypt ciphertext
+ * @req: reference to the ablkcipher_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Decrypt ciphertext data using the aead_request handle. That data structure
+ * and how it is filled with data is discussed with the aead_request_*
+ * functions.
+ *
+ * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the
+ *		  authentication data / tag. That authentication data / tag
+ *		  must have the size defined by the crypto_aead_setauthsize
+ *		  invocation.
+ *
+ *
+ * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD
+ *	   cipher operation performs the authentication of the data during the
+ *	   decryption operation. Therefore, the function returns this error if
+ *	   the authentication of the ciphertext was unsuccessful (i.e. the
+ *	   integrity of the ciphertext or the associated data was violated);
+ *	   < 0 if an error occurred.
+ */
+static inline int crypto_aead_decrypt(struct aead_request *req)
+{
+	if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req)))
+		return -EINVAL;
+
+	return crypto_aead_reqtfm(req)->decrypt(req);
+}
+
+/**
+ * DOC: Asynchronous AEAD Request Handle
+ *
+ * The aead_request data structure contains all pointers to data required for
+ * the AEAD cipher operation. This includes the cipher handle (which can be
+ * used by multiple aead_request instances), pointer to plaintext and
+ * ciphertext, asynchronous callback function, etc. It acts as a handle to the
+ * aead_request_* API calls in a similar way as AEAD handle to the
+ * crypto_aead_* API calls.
+ */
+
+/**
+ * crypto_aead_reqsize() - obtain size of the request data structure
+ * @tfm: cipher handle
+ *
+ * Return: number of bytes
+ */
+unsigned int crypto_aead_reqsize(struct crypto_aead *tfm);
+
+/**
+ * aead_request_set_tfm() - update cipher handle reference in request
+ * @req: request handle to be modified
+ * @tfm: cipher handle that shall be added to the request handle
+ *
+ * Allow the caller to replace the existing aead handle in the request
+ * data structure with a different one.
+ */
+static inline void aead_request_set_tfm(struct aead_request *req,
+					struct crypto_aead *tfm)
+{
+	req->base.tfm = crypto_aead_tfm(tfm->child);
+}
+
+/**
+ * aead_request_alloc() - allocate request data structure
+ * @tfm: cipher handle to be registered with the request
+ * @gfp: memory allocation flag that is handed to kmalloc by the API call.
+ *
+ * Allocate the request data structure that must be used with the AEAD
+ * encrypt and decrypt API calls. During the allocation, the provided aead
+ * handle is registered in the request data structure.
+ *
+ * Return: allocated request handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
+static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
+						      gfp_t gfp)
+{
+	struct aead_request *req;
+
+	req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp);
+
+	if (likely(req))
+		aead_request_set_tfm(req, tfm);
+
+	return req;
+}
+
+/**
+ * aead_request_free() - zeroize and free request data structure
+ * @req: request data structure cipher handle to be freed
+ */
+static inline void aead_request_free(struct aead_request *req)
+{
+	kzfree(req);
+}
+
+/**
+ * aead_request_set_callback() - set asynchronous callback function
+ * @req: request handle
+ * @flags: specify zero or an ORing of the flags
+ *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
+ *	   increase the wait queue beyond the initial maximum size;
+ *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
+ * @compl: callback function pointer to be registered with the request handle
+ * @data: The data pointer refers to memory that is not used by the kernel
+ *	  crypto API, but provided to the callback function for it to use. Here,
+ *	  the caller can provide a reference to memory the callback function can
+ *	  operate on. As the callback function is invoked asynchronously to the
+ *	  related functionality, it may need to access data structures of the
+ *	  related functionality which can be referenced using this pointer. The
+ *	  callback function can access the memory via the "data" field in the
+ *	  crypto_async_request data structure provided to the callback function.
+ *
+ * Setting the callback function that is triggered once the cipher operation
+ * completes
+ *
+ * The callback function is registered with the aead_request handle and
+ * must comply with the following template
+ *
+ *	void callback_function(struct crypto_async_request *req, int error)
+ */
+static inline void aead_request_set_callback(struct aead_request *req,
+					     u32 flags,
+					     crypto_completion_t compl,
+					     void *data)
+{
+	req->base.complete = compl;
+	req->base.data = data;
+	req->base.flags = flags;
+}
+
+/**
+ * aead_request_set_crypt - set data buffers
+ * @req: request handle
+ * @src: source scatter / gather list
+ * @dst: destination scatter / gather list
+ * @cryptlen: number of bytes to process from @src
+ * @iv: IV for the cipher operation which must comply with the IV size defined
+ *      by crypto_aead_ivsize()
+ *
+ * Setting the source data and destination data scatter / gather lists.
+ *
+ * For encryption, the source is treated as the plaintext and the
+ * destination is the ciphertext. For a decryption operation, the use is
+ * reversed - the source is the ciphertext and the destination is the plaintext.
+ *
+ * For both src/dst the layout is associated data, skipped data,
+ * plain/cipher text, authentication tag.
+ *
+ * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption,
+ *		  the caller must concatenate the ciphertext followed by the
+ *		  authentication tag and provide the entire data stream to the
+ *		  decryption operation (i.e. the data length used for the
+ *		  initialization of the scatterlist and the data length for the
+ *		  decryption operation is identical). For encryption, however,
+ *		  the authentication tag is created while encrypting the data.
+ *		  The destination buffer must hold sufficient space for the
+ *		  ciphertext and the authentication tag while the encryption
+ *		  invocation must only point to the plaintext data size. The
+ *		  following code snippet illustrates the memory usage
+ *		  buffer = kmalloc(ptbuflen + (enc ? authsize : 0));
+ *		  sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0));
+ *		  aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv);
+ */
+static inline void aead_request_set_crypt(struct aead_request *req,
+					  struct scatterlist *src,
+					  struct scatterlist *dst,
+					  unsigned int cryptlen, u8 *iv)
+{
+	req->src = src;
+	req->dst = dst;
+	req->cryptlen = cryptlen;
+	req->iv = iv;
+}
+
+/**
+ * aead_request_set_assoc() - set the associated data scatter / gather list
+ * @req: request handle
+ * @assoc: associated data scatter / gather list
+ * @assoclen: number of bytes to process from @assoc
+ *
+ * Obsolete, do not use.
+ */
+static inline void aead_request_set_assoc(struct aead_request *req,
+					  struct scatterlist *assoc,
+					  unsigned int assoclen)
+{
+	req->assoc = assoc;
+	req->assoclen = assoclen;
+	req->old = true;
+}
+
+/**
+ * aead_request_set_ad - set associated data information
+ * @req: request handle
+ * @assoclen: number of bytes in associated data
+ *
+ * Setting the AD information.  This function sets the length of
+ * the associated data and the number of bytes to skip after it to
+ * access the plain/cipher text.
+ */
+static inline void aead_request_set_ad(struct aead_request *req,
+				       unsigned int assoclen)
+{
+	req->assoclen = assoclen;
+	req->old = false;
+}
+
 static inline struct crypto_aead *aead_givcrypt_reqtfm(
 	struct aead_givcrypt_request *req)
 {
@@ -38,14 +543,12 @@ static inline struct crypto_aead *aead_givcrypt_reqtfm(
 
 static inline int crypto_aead_givencrypt(struct aead_givcrypt_request *req)
 {
-	struct aead_tfm *crt = crypto_aead_crt(aead_givcrypt_reqtfm(req));
-	return crt->givencrypt(req);
+	return aead_givcrypt_reqtfm(req)->givencrypt(req);
 };
 
 static inline int crypto_aead_givdecrypt(struct aead_givcrypt_request *req)
 {
-	struct aead_tfm *crt = crypto_aead_crt(aead_givcrypt_reqtfm(req));
-	return crt->givdecrypt(req);
+	return aead_givcrypt_reqtfm(req)->givdecrypt(req);
 };
 
 static inline void aead_givcrypt_set_tfm(struct aead_givcrypt_request *req,
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 0ecb768..d4ebf6e 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+struct crypto_aead;
 struct module;
 struct rtattr;
 struct seq_file;
@@ -126,7 +127,6 @@ struct ablkcipher_walk {
 };
 
 extern const struct crypto_type crypto_ablkcipher_type;
-extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_blkcipher_type;
 
 void crypto_mod_put(struct crypto_alg *alg);
@@ -144,6 +144,8 @@ int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
 		       struct crypto_instance *inst,
 		       const struct crypto_type *frontend);
+int crypto_grab_spawn(struct crypto_spawn *spawn, const char *name,
+		      u32 type, u32 mask);
 
 void crypto_drop_spawn(struct crypto_spawn *spawn);
 struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
@@ -239,22 +241,6 @@ static inline void *crypto_ablkcipher_ctx_aligned(struct crypto_ablkcipher *tfm)
 	return crypto_tfm_ctx_aligned(&tfm->base);
 }
 
-static inline struct aead_alg *crypto_aead_alg(struct crypto_aead *tfm)
-{
-	return &crypto_aead_tfm(tfm)->__crt_alg->cra_aead;
-}
-
-static inline void *crypto_aead_ctx(struct crypto_aead *tfm)
-{
-	return crypto_tfm_ctx(&tfm->base);
-}
-
-static inline struct crypto_instance *crypto_aead_alg_instance(
-	struct crypto_aead *aead)
-{
-	return crypto_tfm_alg_instance(&aead->base);
-}
-
 static inline struct crypto_blkcipher *crypto_spawn_blkcipher(
 	struct crypto_spawn *spawn)
 {
@@ -363,21 +349,6 @@ static inline int ablkcipher_tfm_in_queue(struct crypto_queue *queue,
 	return crypto_tfm_in_queue(queue, crypto_ablkcipher_tfm(tfm));
 }
 
-static inline void *aead_request_ctx(struct aead_request *req)
-{
-	return req->__ctx;
-}
-
-static inline void aead_request_complete(struct aead_request *req, int err)
-{
-	req->base.complete(&req->base, err);
-}
-
-static inline u32 aead_request_flags(struct aead_request *req)
-{
-	return req->base.flags;
-}
-
 static inline struct crypto_alg *crypto_get_attr_alg(struct rtattr **tb,
 						     u32 type, u32 mask)
 {
diff --git a/include/crypto/compress.h b/include/crypto/compress.h
index 86163ef..5b67af8 100644
--- a/include/crypto/compress.h
+++ b/include/crypto/compress.h
@@ -55,14 +55,14 @@ struct crypto_pcomp {
 };
 
 struct pcomp_alg {
-	int (*compress_setup)(struct crypto_pcomp *tfm, void *params,
+	int (*compress_setup)(struct crypto_pcomp *tfm, const void *params,
 			      unsigned int len);
 	int (*compress_init)(struct crypto_pcomp *tfm);
 	int (*compress_update)(struct crypto_pcomp *tfm,
 			       struct comp_request *req);
 	int (*compress_final)(struct crypto_pcomp *tfm,
 			      struct comp_request *req);
-	int (*decompress_setup)(struct crypto_pcomp *tfm, void *params,
+	int (*decompress_setup)(struct crypto_pcomp *tfm, const void *params,
 				unsigned int len);
 	int (*decompress_init)(struct crypto_pcomp *tfm);
 	int (*decompress_update)(struct crypto_pcomp *tfm,
@@ -97,7 +97,7 @@ static inline struct pcomp_alg *crypto_pcomp_alg(struct crypto_pcomp *tfm)
 }
 
 static inline int crypto_compress_setup(struct crypto_pcomp *tfm,
-					void *params, unsigned int len)
+					const void *params, unsigned int len)
 {
 	return crypto_pcomp_alg(tfm)->compress_setup(tfm, params, len);
 }
@@ -120,7 +120,7 @@ static inline int crypto_compress_final(struct crypto_pcomp *tfm,
 }
 
 static inline int crypto_decompress_setup(struct crypto_pcomp *tfm,
-					  void *params, unsigned int len)
+					  const void *params, unsigned int len)
 {
 	return crypto_pcomp_alg(tfm)->decompress_setup(tfm, params, len);
 }
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index ba98918..1547f54 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -14,6 +14,7 @@
 
 #include <linux/crypto.h>
 #include <linux/kernel.h>
+#include <crypto/aead.h>
 #include <crypto/hash.h>
 
 struct cryptd_ablkcipher {
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 5186f75..c3f208d 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -49,8 +49,9 @@
 #include <crypto/internal/rng.h>
 #include <crypto/rng.h>
 #include <linux/fips.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 /*
  * Concatenation Helper and string operation helper
@@ -104,7 +105,7 @@ struct drbg_test_data {
 };
 
 struct drbg_state {
-	spinlock_t drbg_lock;	/* lock around DRBG */
+	struct mutex drbg_mutex;	/* lock around DRBG */
 	unsigned char *V;	/* internal state 10.1.1.1 1a) */
 	/* hash: static value 10.1.1.1 1b) hmac / ctr: key */
 	unsigned char *C;
@@ -119,9 +120,13 @@ struct drbg_state {
 	bool fips_primed;	/* Continuous test primed? */
 	unsigned char *prev;	/* FIPS 140-2 continuous test value */
 #endif
+	struct work_struct seed_work;	/* asynchronous seeding support */
+	u8 *seed_buf;			/* buffer holding the seed */
+	size_t seed_buf_len;
+	struct crypto_rng *jent;
 	const struct drbg_state_ops *d_ops;
 	const struct drbg_core *core;
-	struct drbg_test_data *test_data;
+	struct drbg_string test_data;
 };
 
 static inline __u8 drbg_statelen(struct drbg_state *drbg)
@@ -177,19 +182,8 @@ static inline size_t drbg_max_requests(struct drbg_state *drbg)
 }
 
 /*
- * kernel crypto API input data structure for DRBG generate in case dlen
- * is set to 0
- */
-struct drbg_gen {
-	unsigned char *outbuf;	/* output buffer for random numbers */
-	unsigned int outlen;	/* size of output buffer */
-	struct drbg_string *addtl;	/* additional information string */
-	struct drbg_test_data *test_data;	/* test data */
-};
-
-/*
  * This is a wrapper to the kernel crypto API function of
- * crypto_rng_get_bytes() to allow the caller to provide additional data.
+ * crypto_rng_generate() to allow the caller to provide additional data.
  *
  * @drng DRBG handle -- see crypto_rng_get_bytes
  * @outbuf output buffer -- see crypto_rng_get_bytes
@@ -204,21 +198,15 @@ static inline int crypto_drbg_get_bytes_addtl(struct crypto_rng *drng,
 			unsigned char *outbuf, unsigned int outlen,
 			struct drbg_string *addtl)
 {
-	int ret;
-	struct drbg_gen genbuf;
-	genbuf.outbuf = outbuf;
-	genbuf.outlen = outlen;
-	genbuf.addtl = addtl;
-	genbuf.test_data = NULL;
-	ret = crypto_rng_get_bytes(drng, (u8 *)&genbuf, 0);
-	return ret;
+	return crypto_rng_generate(drng, addtl->buf, addtl->len,
+				   outbuf, outlen);
 }
 
 /*
  * TEST code
  *
  * This is a wrapper to the kernel crypto API function of
- * crypto_rng_get_bytes() to allow the caller to provide additional data and
+ * crypto_rng_generate() to allow the caller to provide additional data and
  * allow furnishing of test_data
  *
  * @drng DRBG handle -- see crypto_rng_get_bytes
@@ -236,14 +224,10 @@ static inline int crypto_drbg_get_bytes_addtl_test(struct crypto_rng *drng,
 			struct drbg_string *addtl,
 			struct drbg_test_data *test_data)
 {
-	int ret;
-	struct drbg_gen genbuf;
-	genbuf.outbuf = outbuf;
-	genbuf.outlen = outlen;
-	genbuf.addtl = addtl;
-	genbuf.test_data = test_data;
-	ret = crypto_rng_get_bytes(drng, (u8 *)&genbuf, 0);
-	return ret;
+	crypto_rng_set_entropy(drng, test_data->testentropy->buf,
+			       test_data->testentropy->len);
+	return crypto_rng_generate(drng, addtl->buf, addtl->len,
+				   outbuf, outlen);
 }
 
 /*
@@ -264,14 +248,9 @@ static inline int crypto_drbg_reset_test(struct crypto_rng *drng,
 					 struct drbg_string *pers,
 					 struct drbg_test_data *test_data)
 {
-	int ret;
-	struct drbg_gen genbuf;
-	genbuf.outbuf = NULL;
-	genbuf.outlen = 0;
-	genbuf.addtl = pers;
-	genbuf.test_data = test_data;
-	ret = crypto_rng_reset(drng, (u8 *)&genbuf, 0);
-	return ret;
+	crypto_rng_set_entropy(drng, test_data->testentropy->buf,
+			       test_data->testentropy->len);
+	return crypto_rng_reset(drng, pers->buf, pers->len);
 }
 
 /* DRBG type flags */
diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h
index 2eba340..4137330 100644
--- a/include/crypto/internal/aead.h
+++ b/include/crypto/internal/aead.h
@@ -19,12 +19,59 @@
 
 struct rtattr;
 
+struct aead_instance {
+	struct aead_alg alg;
+};
+
 struct crypto_aead_spawn {
 	struct crypto_spawn base;
 };
 
+extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_nivaead_type;
 
+static inline void *crypto_aead_ctx(struct crypto_aead *tfm)
+{
+	return crypto_tfm_ctx(&tfm->base);
+}
+
+static inline struct crypto_instance *crypto_aead_alg_instance(
+	struct crypto_aead *aead)
+{
+	return crypto_tfm_alg_instance(&aead->base);
+}
+
+static inline struct crypto_instance *aead_crypto_instance(
+	struct aead_instance *inst)
+{
+	return container_of(&inst->alg.base, struct crypto_instance, alg);
+}
+
+static inline struct aead_instance *aead_instance(struct crypto_instance *inst)
+{
+	return container_of(&inst->alg, struct aead_instance, alg.base);
+}
+
+static inline void *aead_instance_ctx(struct aead_instance *inst)
+{
+	return crypto_instance_ctx(aead_crypto_instance(inst));
+}
+
+static inline void *aead_request_ctx(struct aead_request *req)
+{
+	return req->__ctx;
+}
+
+static inline void aead_request_complete(struct aead_request *req, int err)
+{
+	req->base.complete(&req->base, err);
+}
+
+static inline u32 aead_request_flags(struct aead_request *req)
+{
+	return req->base.flags;
+}
+
 static inline void crypto_set_aead_spawn(
 	struct crypto_aead_spawn *spawn, struct crypto_instance *inst)
 {
@@ -47,24 +94,27 @@ static inline struct crypto_alg *crypto_aead_spawn_alg(
 	return spawn->base.alg;
 }
 
+static inline struct aead_alg *crypto_spawn_aead_alg(
+	struct crypto_aead_spawn *spawn)
+{
+	return container_of(spawn->base.alg, struct aead_alg, base);
+}
+
 static inline struct crypto_aead *crypto_spawn_aead(
 	struct crypto_aead_spawn *spawn)
 {
-	return __crypto_aead_cast(
-		crypto_spawn_tfm(&spawn->base, CRYPTO_ALG_TYPE_AEAD,
-				 CRYPTO_ALG_TYPE_MASK));
+	return crypto_spawn_tfm2(&spawn->base);
 }
 
-struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
-					 struct rtattr **tb, u32 type,
-					 u32 mask);
-void aead_geniv_free(struct crypto_instance *inst);
+struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
+				       struct rtattr **tb, u32 type, u32 mask);
+void aead_geniv_free(struct aead_instance *inst);
 int aead_geniv_init(struct crypto_tfm *tfm);
 void aead_geniv_exit(struct crypto_tfm *tfm);
 
 static inline struct crypto_aead *aead_geniv_base(struct crypto_aead *geniv)
 {
-	return crypto_aead_crt(geniv)->base;
+	return geniv->child;
 }
 
 static inline void *aead_givcrypt_reqctx(struct aead_givcrypt_request *req)
@@ -78,5 +128,27 @@ static inline void aead_givcrypt_complete(struct aead_givcrypt_request *req,
 	aead_request_complete(&req->areq, err);
 }
 
+static inline void crypto_aead_set_reqsize(struct crypto_aead *aead,
+					   unsigned int reqsize)
+{
+	crypto_aead_crt(aead)->reqsize = reqsize;
+}
+
+static inline unsigned int crypto_aead_alg_maxauthsize(struct aead_alg *alg)
+{
+	return alg->base.cra_aead.encrypt ? alg->base.cra_aead.maxauthsize :
+					    alg->maxauthsize;
+}
+
+static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead)
+{
+	return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead));
+}
+
+int crypto_register_aead(struct aead_alg *alg);
+int crypto_unregister_aead(struct aead_alg *alg);
+int aead_register_instance(struct crypto_template *tmpl,
+			   struct aead_instance *inst);
+
 #endif	/* _CRYPTO_INTERNAL_AEAD_H */
 
diff --git a/include/crypto/internal/rng.h b/include/crypto/internal/rng.h
index 8969733..263f1a5 100644
--- a/include/crypto/internal/rng.h
+++ b/include/crypto/internal/rng.h
@@ -2,6 +2,7 @@
  * RNG: Random Number Generator  algorithms under the crypto API
  *
  * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com>
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -16,11 +17,20 @@
 #include <crypto/algapi.h>
 #include <crypto/rng.h>
 
-extern const struct crypto_type crypto_rng_type;
+int crypto_register_rng(struct rng_alg *alg);
+void crypto_unregister_rng(struct rng_alg *alg);
+int crypto_register_rngs(struct rng_alg *algs, int count);
+void crypto_unregister_rngs(struct rng_alg *algs, int count);
 
 static inline void *crypto_rng_ctx(struct crypto_rng *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
 }
 
+static inline void crypto_rng_set_entropy(struct crypto_rng *tfm,
+					  const u8 *data, unsigned int len)
+{
+	crypto_rng_alg(tfm)->set_ent(tfm, data, len);
+}
+
 #endif
diff --git a/include/crypto/md5.h b/include/crypto/md5.h
index 65f299b..146af825 100644
--- a/include/crypto/md5.h
+++ b/include/crypto/md5.h
@@ -8,6 +8,11 @@
 #define MD5_BLOCK_WORDS		16
 #define MD5_HASH_WORDS		4
 
+#define MD5_H0	0x67452301UL
+#define MD5_H1	0xefcdab89UL
+#define MD5_H2	0x98badcfeUL
+#define MD5_H3	0x10325476UL
+
 struct md5_state {
 	u32 hash[MD5_HASH_WORDS];
 	u32 block[MD5_BLOCK_WORDS];
diff --git a/include/crypto/null.h b/include/crypto/null.h
index b7c864c..06dc30d 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -8,4 +8,7 @@
 #define NULL_DIGEST_SIZE	0
 #define NULL_IV_SIZE		0
 
+struct crypto_blkcipher *crypto_get_default_null_skcipher(void);
+void crypto_put_default_null_skcipher(void);
+
 #endif
diff --git a/include/crypto/rng.h b/include/crypto/rng.h
index 6e28ea5..c5d4684 100644
--- a/include/crypto/rng.h
+++ b/include/crypto/rng.h
@@ -2,6 +2,7 @@
  * RNG: Random Number Generator  algorithms under the crypto API
  *
  * Copyright (c) 2008 Neil Horman <nhorman@tuxdriver.com>
+ * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -15,6 +16,50 @@
 
 #include <linux/crypto.h>
 
+struct crypto_rng;
+
+/**
+ * struct rng_alg - random number generator definition
+ *
+ * @generate:	The function defined by this variable obtains a
+ *		random number. The random number generator transform
+ *		must generate the random number out of the context
+ *		provided with this call, plus any additional data
+ *		if provided to the call.
+ * @seed:	Seed or reseed the random number generator.  With the
+ *		invocation of this function call, the random number
+ *		generator shall become ready fo generation.  If the
+ *		random number generator requires a seed for setting
+ *		up a new state, the seed must be provided by the
+ *		consumer while invoking this function. The required
+ *		size of the seed is defined with @seedsize .
+ * @set_ent:	Set entropy that would otherwise be obtained from
+ *		entropy source.  Internal use only.
+ * @seedsize:	The seed size required for a random number generator
+ *		initialization defined with this variable. Some
+ *		random number generators does not require a seed
+ *		as the seeding is implemented internally without
+ *		the need of support by the consumer. In this case,
+ *		the seed size is set to zero.
+ * @base:	Common crypto API algorithm data structure.
+ */
+struct rng_alg {
+	int (*generate)(struct crypto_rng *tfm,
+			const u8 *src, unsigned int slen,
+			u8 *dst, unsigned int dlen);
+	int (*seed)(struct crypto_rng *tfm, const u8 *seed, unsigned int slen);
+	void (*set_ent)(struct crypto_rng *tfm, const u8 *data,
+			unsigned int len);
+
+	unsigned int seedsize;
+
+	struct crypto_alg base;
+};
+
+struct crypto_rng {
+	struct crypto_tfm base;
+};
+
 extern struct crypto_rng *crypto_default_rng;
 
 int crypto_get_default_rng(void);
@@ -27,11 +72,6 @@ void crypto_put_default_rng(void);
  * CRYPTO_ALG_TYPE_RNG (listed as type "rng" in /proc/crypto)
  */
 
-static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
-{
-	return (struct crypto_rng *)tfm;
-}
-
 /**
  * crypto_alloc_rng() -- allocate RNG handle
  * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
@@ -52,15 +92,7 @@ static inline struct crypto_rng *__crypto_rng_cast(struct crypto_tfm *tfm)
  * Return: allocated cipher handle in case of success; IS_ERR() is true in case
  *	   of an error, PTR_ERR() returns the error code.
  */
-static inline struct crypto_rng *crypto_alloc_rng(const char *alg_name,
-						  u32 type, u32 mask)
-{
-	type &= ~CRYPTO_ALG_TYPE_MASK;
-	type |= CRYPTO_ALG_TYPE_RNG;
-	mask |= CRYPTO_ALG_TYPE_MASK;
-
-	return __crypto_rng_cast(crypto_alloc_base(alg_name, type, mask));
-}
+struct crypto_rng *crypto_alloc_rng(const char *alg_name, u32 type, u32 mask);
 
 static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm)
 {
@@ -77,12 +109,8 @@ static inline struct crypto_tfm *crypto_rng_tfm(struct crypto_rng *tfm)
  */
 static inline struct rng_alg *crypto_rng_alg(struct crypto_rng *tfm)
 {
-	return &crypto_rng_tfm(tfm)->__crt_alg->cra_rng;
-}
-
-static inline struct rng_tfm *crypto_rng_crt(struct crypto_rng *tfm)
-{
-	return &crypto_rng_tfm(tfm)->crt_rng;
+	return container_of(crypto_rng_tfm(tfm)->__crt_alg,
+			    struct rng_alg, base);
 }
 
 /**
@@ -91,7 +119,28 @@ static inline struct rng_tfm *crypto_rng_crt(struct crypto_rng *tfm)
  */
 static inline void crypto_free_rng(struct crypto_rng *tfm)
 {
-	crypto_free_tfm(crypto_rng_tfm(tfm));
+	crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm));
+}
+
+/**
+ * crypto_rng_generate() - get random number
+ * @tfm: cipher handle
+ * @src: Input buffer holding additional data, may be NULL
+ * @slen: Length of additional data
+ * @dst: output buffer holding the random numbers
+ * @dlen: length of the output buffer
+ *
+ * This function fills the caller-allocated buffer with random
+ * numbers using the random number generator referenced by the
+ * cipher handle.
+ *
+ * Return: 0 function was successful; < 0 if an error occurred
+ */
+static inline int crypto_rng_generate(struct crypto_rng *tfm,
+				      const u8 *src, unsigned int slen,
+				      u8 *dst, unsigned int dlen)
+{
+	return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen);
 }
 
 /**
@@ -108,7 +157,7 @@ static inline void crypto_free_rng(struct crypto_rng *tfm)
 static inline int crypto_rng_get_bytes(struct crypto_rng *tfm,
 				       u8 *rdata, unsigned int dlen)
 {
-	return crypto_rng_crt(tfm)->rng_gen_random(tfm, rdata, dlen);
+	return crypto_rng_generate(tfm, NULL, 0, rdata, dlen);
 }
 
 /**
@@ -128,11 +177,8 @@ static inline int crypto_rng_get_bytes(struct crypto_rng *tfm,
  *
  * Return: 0 if the setting of the key was successful; < 0 if an error occurred
  */
-static inline int crypto_rng_reset(struct crypto_rng *tfm,
-				   u8 *seed, unsigned int slen)
-{
-	return crypto_rng_crt(tfm)->rng_reset(tfm, seed, slen);
-}
+int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed,
+		     unsigned int slen);
 
 /**
  * crypto_rng_seedsize() - obtain seed size of RNG
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 20e4226..96670e7 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -102,4 +102,8 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 
 int scatterwalk_bytes_sglen(struct scatterlist *sg, int num_bytes);
 
+struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
+				     struct scatterlist *src,
+				     unsigned int len);
+
 #endif  /* _CRYPTO_SCATTERWALK_H */
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 10df5d2..7d290a9 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -138,9 +138,9 @@ struct crypto_async_request;
 struct crypto_aead;
 struct crypto_blkcipher;
 struct crypto_hash;
-struct crypto_rng;
 struct crypto_tfm;
 struct crypto_type;
+struct aead_request;
 struct aead_givcrypt_request;
 struct skcipher_givcrypt_request;
 
@@ -175,32 +175,6 @@ struct ablkcipher_request {
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
-/**
- *	struct aead_request - AEAD request
- *	@base: Common attributes for async crypto requests
- *	@assoclen: Length in bytes of associated data for authentication
- *	@cryptlen: Length of data to be encrypted or decrypted
- *	@iv: Initialisation vector
- *	@assoc: Associated data
- *	@src: Source data
- *	@dst: Destination data
- *	@__ctx: Start of private context data
- */
-struct aead_request {
-	struct crypto_async_request base;
-
-	unsigned int assoclen;
-	unsigned int cryptlen;
-
-	u8 *iv;
-
-	struct scatterlist *assoc;
-	struct scatterlist *src;
-	struct scatterlist *dst;
-
-	void *__ctx[] CRYPTO_MINALIGN_ATTR;
-};
-
 struct blkcipher_desc {
 	struct crypto_blkcipher *tfm;
 	void *info;
@@ -294,7 +268,7 @@ struct ablkcipher_alg {
 };
 
 /**
- * struct aead_alg - AEAD cipher definition
+ * struct old_aead_alg - AEAD cipher definition
  * @maxauthsize: Set the maximum authentication tag size supported by the
  *		 transformation. A transformation may support smaller tag sizes.
  *		 As the authentication tag is a message digest to ensure the
@@ -319,7 +293,7 @@ struct ablkcipher_alg {
  * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are
  * mandatory and must be filled.
  */
-struct aead_alg {
+struct old_aead_alg {
 	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
 	              unsigned int keylen);
 	int (*setauthsize)(struct crypto_aead *tfm, unsigned int authsize);
@@ -426,40 +400,12 @@ struct compress_alg {
 			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
-/**
- * struct rng_alg - random number generator definition
- * @rng_make_random: The function defined by this variable obtains a random
- *		     number. The random number generator transform must generate
- *		     the random number out of the context provided with this
- *		     call.
- * @rng_reset: Reset of the random number generator by clearing the entire state.
- *	       With the invocation of this function call, the random number
- *             generator shall completely reinitialize its state. If the random
- *	       number generator requires a seed for setting up a new state,
- *	       the seed must be provided by the consumer while invoking this
- *	       function. The required size of the seed is defined with
- *	       @seedsize .
- * @seedsize: The seed size required for a random number generator
- *	      initialization defined with this variable. Some random number
- *	      generators like the SP800-90A DRBG does not require a seed as the
- *	      seeding is implemented internally without the need of support by
- *	      the consumer. In this case, the seed size is set to zero.
- */
-struct rng_alg {
-	int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata,
-			       unsigned int dlen);
-	int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen);
-
-	unsigned int seedsize;
-};
-
 
 #define cra_ablkcipher	cra_u.ablkcipher
 #define cra_aead	cra_u.aead
 #define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
 #define cra_compress	cra_u.compress
-#define cra_rng		cra_u.rng
 
 /**
  * struct crypto_alg - definition of a cryptograpic cipher algorithm
@@ -555,11 +501,10 @@ struct crypto_alg {
 
 	union {
 		struct ablkcipher_alg ablkcipher;
-		struct aead_alg aead;
+		struct old_aead_alg aead;
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
 		struct compress_alg compress;
-		struct rng_alg rng;
 	} cra_u;
 
 	int (*cra_init)(struct crypto_tfm *tfm);
@@ -602,21 +547,6 @@ struct ablkcipher_tfm {
 	unsigned int reqsize;
 };
 
-struct aead_tfm {
-	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
-	              unsigned int keylen);
-	int (*encrypt)(struct aead_request *req);
-	int (*decrypt)(struct aead_request *req);
-	int (*givencrypt)(struct aead_givcrypt_request *req);
-	int (*givdecrypt)(struct aead_givcrypt_request *req);
-
-	struct crypto_aead *base;
-
-	unsigned int ivsize;
-	unsigned int authsize;
-	unsigned int reqsize;
-};
-
 struct blkcipher_tfm {
 	void *iv;
 	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
@@ -655,19 +585,11 @@ struct compress_tfm {
 	                      u8 *dst, unsigned int *dlen);
 };
 
-struct rng_tfm {
-	int (*rng_gen_random)(struct crypto_rng *tfm, u8 *rdata,
-			      unsigned int dlen);
-	int (*rng_reset)(struct crypto_rng *tfm, u8 *seed, unsigned int slen);
-};
-
 #define crt_ablkcipher	crt_u.ablkcipher
-#define crt_aead	crt_u.aead
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
 #define crt_hash	crt_u.hash
 #define crt_compress	crt_u.compress
-#define crt_rng		crt_u.rng
 
 struct crypto_tfm {
 
@@ -675,12 +597,10 @@ struct crypto_tfm {
 	
 	union {
 		struct ablkcipher_tfm ablkcipher;
-		struct aead_tfm aead;
 		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
 		struct hash_tfm hash;
 		struct compress_tfm compress;
-		struct rng_tfm rng;
 	} crt_u;
 
 	void (*exit)(struct crypto_tfm *tfm);
@@ -694,10 +614,6 @@ struct crypto_ablkcipher {
 	struct crypto_tfm base;
 };
 
-struct crypto_aead {
-	struct crypto_tfm base;
-};
-
 struct crypto_blkcipher {
 	struct crypto_tfm base;
 };
@@ -714,10 +630,6 @@ struct crypto_hash {
 	struct crypto_tfm base;
 };
 
-struct crypto_rng {
-	struct crypto_tfm base;
-};
-
 enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
@@ -1194,400 +1106,6 @@ static inline void ablkcipher_request_set_crypt(
 }
 
 /**
- * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API
- *
- * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD
- * (listed as type "aead" in /proc/crypto)
- *
- * The most prominent examples for this type of encryption is GCM and CCM.
- * However, the kernel supports other types of AEAD ciphers which are defined
- * with the following cipher string:
- *
- *	authenc(keyed message digest, block cipher)
- *
- * For example: authenc(hmac(sha256), cbc(aes))
- *
- * The example code provided for the asynchronous block cipher operation
- * applies here as well. Naturally all *ablkcipher* symbols must be exchanged
- * the *aead* pendants discussed in the following. In addtion, for the AEAD
- * operation, the aead_request_set_assoc function must be used to set the
- * pointer to the associated data memory location before performing the
- * encryption or decryption operation. In case of an encryption, the associated
- * data memory is filled during the encryption operation. For decryption, the
- * associated data memory must contain data that is used to verify the integrity
- * of the decrypted data. Another deviation from the asynchronous block cipher
- * operation is that the caller should explicitly check for -EBADMSG of the
- * crypto_aead_decrypt. That error indicates an authentication error, i.e.
- * a breach in the integrity of the message. In essence, that -EBADMSG error
- * code is the key bonus an AEAD cipher has over "standard" block chaining
- * modes.
- */
-
-static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm)
-{
-	return (struct crypto_aead *)tfm;
-}
-
-/**
- * crypto_alloc_aead() - allocate AEAD cipher handle
- * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
- *	     AEAD cipher
- * @type: specifies the type of the cipher
- * @mask: specifies the mask for the cipher
- *
- * Allocate a cipher handle for an AEAD. The returned struct
- * crypto_aead is the cipher handle that is required for any subsequent
- * API invocation for that AEAD.
- *
- * Return: allocated cipher handle in case of success; IS_ERR() is true in case
- *	   of an error, PTR_ERR() returns the error code.
- */
-struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask);
-
-static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
-{
-	return &tfm->base;
-}
-
-/**
- * crypto_free_aead() - zeroize and free aead handle
- * @tfm: cipher handle to be freed
- */
-static inline void crypto_free_aead(struct crypto_aead *tfm)
-{
-	crypto_free_tfm(crypto_aead_tfm(tfm));
-}
-
-static inline struct aead_tfm *crypto_aead_crt(struct crypto_aead *tfm)
-{
-	return &crypto_aead_tfm(tfm)->crt_aead;
-}
-
-/**
- * crypto_aead_ivsize() - obtain IV size
- * @tfm: cipher handle
- *
- * The size of the IV for the aead referenced by the cipher handle is
- * returned. This IV size may be zero if the cipher does not need an IV.
- *
- * Return: IV size in bytes
- */
-static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->ivsize;
-}
-
-/**
- * crypto_aead_authsize() - obtain maximum authentication data size
- * @tfm: cipher handle
- *
- * The maximum size of the authentication data for the AEAD cipher referenced
- * by the AEAD cipher handle is returned. The authentication data size may be
- * zero if the cipher implements a hard-coded maximum.
- *
- * The authentication data may also be known as "tag value".
- *
- * Return: authentication data size / tag size in bytes
- */
-static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->authsize;
-}
-
-/**
- * crypto_aead_blocksize() - obtain block size of cipher
- * @tfm: cipher handle
- *
- * The block size for the AEAD referenced with the cipher handle is returned.
- * The caller may use that information to allocate appropriate memory for the
- * data returned by the encryption or decryption operation
- *
- * Return: block size of cipher
- */
-static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm)
-{
-	return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm));
-}
-
-static inline unsigned int crypto_aead_alignmask(struct crypto_aead *tfm)
-{
-	return crypto_tfm_alg_alignmask(crypto_aead_tfm(tfm));
-}
-
-static inline u32 crypto_aead_get_flags(struct crypto_aead *tfm)
-{
-	return crypto_tfm_get_flags(crypto_aead_tfm(tfm));
-}
-
-static inline void crypto_aead_set_flags(struct crypto_aead *tfm, u32 flags)
-{
-	crypto_tfm_set_flags(crypto_aead_tfm(tfm), flags);
-}
-
-static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags)
-{
-	crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags);
-}
-
-/**
- * crypto_aead_setkey() - set key for cipher
- * @tfm: cipher handle
- * @key: buffer holding the key
- * @keylen: length of the key in bytes
- *
- * The caller provided key is set for the AEAD referenced by the cipher
- * handle.
- *
- * Note, the key length determines the cipher type. Many block ciphers implement
- * different cipher modes depending on the key size, such as AES-128 vs AES-192
- * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
- * is performed.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key,
-				     unsigned int keylen)
-{
-	struct aead_tfm *crt = crypto_aead_crt(tfm);
-
-	return crt->setkey(crt->base, key, keylen);
-}
-
-/**
- * crypto_aead_setauthsize() - set authentication data size
- * @tfm: cipher handle
- * @authsize: size of the authentication data / tag in bytes
- *
- * Set the authentication data size / tag size. AEAD requires an authentication
- * tag (or MAC) in addition to the associated data.
- *
- * Return: 0 if the setting of the key was successful; < 0 if an error occurred
- */
-int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize);
-
-static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
-{
-	return __crypto_aead_cast(req->base.tfm);
-}
-
-/**
- * crypto_aead_encrypt() - encrypt plaintext
- * @req: reference to the aead_request handle that holds all information
- *	 needed to perform the cipher operation
- *
- * Encrypt plaintext data using the aead_request handle. That data structure
- * and how it is filled with data is discussed with the aead_request_*
- * functions.
- *
- * IMPORTANT NOTE The encryption operation creates the authentication data /
- *		  tag. That data is concatenated with the created ciphertext.
- *		  The ciphertext memory size is therefore the given number of
- *		  block cipher blocks + the size defined by the
- *		  crypto_aead_setauthsize invocation. The caller must ensure
- *		  that sufficient memory is available for the ciphertext and
- *		  the authentication tag.
- *
- * Return: 0 if the cipher operation was successful; < 0 if an error occurred
- */
-static inline int crypto_aead_encrypt(struct aead_request *req)
-{
-	return crypto_aead_crt(crypto_aead_reqtfm(req))->encrypt(req);
-}
-
-/**
- * crypto_aead_decrypt() - decrypt ciphertext
- * @req: reference to the ablkcipher_request handle that holds all information
- *	 needed to perform the cipher operation
- *
- * Decrypt ciphertext data using the aead_request handle. That data structure
- * and how it is filled with data is discussed with the aead_request_*
- * functions.
- *
- * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the
- *		  authentication data / tag. That authentication data / tag
- *		  must have the size defined by the crypto_aead_setauthsize
- *		  invocation.
- *
- *
- * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD
- *	   cipher operation performs the authentication of the data during the
- *	   decryption operation. Therefore, the function returns this error if
- *	   the authentication of the ciphertext was unsuccessful (i.e. the
- *	   integrity of the ciphertext or the associated data was violated);
- *	   < 0 if an error occurred.
- */
-static inline int crypto_aead_decrypt(struct aead_request *req)
-{
-	if (req->cryptlen < crypto_aead_authsize(crypto_aead_reqtfm(req)))
-		return -EINVAL;
-
-	return crypto_aead_crt(crypto_aead_reqtfm(req))->decrypt(req);
-}
-
-/**
- * DOC: Asynchronous AEAD Request Handle
- *
- * The aead_request data structure contains all pointers to data required for
- * the AEAD cipher operation. This includes the cipher handle (which can be
- * used by multiple aead_request instances), pointer to plaintext and
- * ciphertext, asynchronous callback function, etc. It acts as a handle to the
- * aead_request_* API calls in a similar way as AEAD handle to the
- * crypto_aead_* API calls.
- */
-
-/**
- * crypto_aead_reqsize() - obtain size of the request data structure
- * @tfm: cipher handle
- *
- * Return: number of bytes
- */
-static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm)
-{
-	return crypto_aead_crt(tfm)->reqsize;
-}
-
-/**
- * aead_request_set_tfm() - update cipher handle reference in request
- * @req: request handle to be modified
- * @tfm: cipher handle that shall be added to the request handle
- *
- * Allow the caller to replace the existing aead handle in the request
- * data structure with a different one.
- */
-static inline void aead_request_set_tfm(struct aead_request *req,
-					struct crypto_aead *tfm)
-{
-	req->base.tfm = crypto_aead_tfm(crypto_aead_crt(tfm)->base);
-}
-
-/**
- * aead_request_alloc() - allocate request data structure
- * @tfm: cipher handle to be registered with the request
- * @gfp: memory allocation flag that is handed to kmalloc by the API call.
- *
- * Allocate the request data structure that must be used with the AEAD
- * encrypt and decrypt API calls. During the allocation, the provided aead
- * handle is registered in the request data structure.
- *
- * Return: allocated request handle in case of success; IS_ERR() is true in case
- *	   of an error, PTR_ERR() returns the error code.
- */
-static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
-						      gfp_t gfp)
-{
-	struct aead_request *req;
-
-	req = kmalloc(sizeof(*req) + crypto_aead_reqsize(tfm), gfp);
-
-	if (likely(req))
-		aead_request_set_tfm(req, tfm);
-
-	return req;
-}
-
-/**
- * aead_request_free() - zeroize and free request data structure
- * @req: request data structure cipher handle to be freed
- */
-static inline void aead_request_free(struct aead_request *req)
-{
-	kzfree(req);
-}
-
-/**
- * aead_request_set_callback() - set asynchronous callback function
- * @req: request handle
- * @flags: specify zero or an ORing of the flags
- *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
- *	   increase the wait queue beyond the initial maximum size;
- *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
- * @compl: callback function pointer to be registered with the request handle
- * @data: The data pointer refers to memory that is not used by the kernel
- *	  crypto API, but provided to the callback function for it to use. Here,
- *	  the caller can provide a reference to memory the callback function can
- *	  operate on. As the callback function is invoked asynchronously to the
- *	  related functionality, it may need to access data structures of the
- *	  related functionality which can be referenced using this pointer. The
- *	  callback function can access the memory via the "data" field in the
- *	  crypto_async_request data structure provided to the callback function.
- *
- * Setting the callback function that is triggered once the cipher operation
- * completes
- *
- * The callback function is registered with the aead_request handle and
- * must comply with the following template
- *
- *	void callback_function(struct crypto_async_request *req, int error)
- */
-static inline void aead_request_set_callback(struct aead_request *req,
-					     u32 flags,
-					     crypto_completion_t compl,
-					     void *data)
-{
-	req->base.complete = compl;
-	req->base.data = data;
-	req->base.flags = flags;
-}
-
-/**
- * aead_request_set_crypt - set data buffers
- * @req: request handle
- * @src: source scatter / gather list
- * @dst: destination scatter / gather list
- * @cryptlen: number of bytes to process from @src
- * @iv: IV for the cipher operation which must comply with the IV size defined
- *      by crypto_aead_ivsize()
- *
- * Setting the source data and destination data scatter / gather lists.
- *
- * For encryption, the source is treated as the plaintext and the
- * destination is the ciphertext. For a decryption operation, the use is
- * reversed - the source is the ciphertext and the destination is the plaintext.
- *
- * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption,
- *		  the caller must concatenate the ciphertext followed by the
- *		  authentication tag and provide the entire data stream to the
- *		  decryption operation (i.e. the data length used for the
- *		  initialization of the scatterlist and the data length for the
- *		  decryption operation is identical). For encryption, however,
- *		  the authentication tag is created while encrypting the data.
- *		  The destination buffer must hold sufficient space for the
- *		  ciphertext and the authentication tag while the encryption
- *		  invocation must only point to the plaintext data size. The
- *		  following code snippet illustrates the memory usage
- *		  buffer = kmalloc(ptbuflen + (enc ? authsize : 0));
- *		  sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0));
- *		  aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv);
- */
-static inline void aead_request_set_crypt(struct aead_request *req,
-					  struct scatterlist *src,
-					  struct scatterlist *dst,
-					  unsigned int cryptlen, u8 *iv)
-{
-	req->src = src;
-	req->dst = dst;
-	req->cryptlen = cryptlen;
-	req->iv = iv;
-}
-
-/**
- * aead_request_set_assoc() - set the associated data scatter / gather list
- * @req: request handle
- * @assoc: associated data scatter / gather list
- * @assoclen: number of bytes to process from @assoc
- *
- * For encryption, the memory is filled with the associated data. For
- * decryption, the memory must point to the associated data.
- */
-static inline void aead_request_set_assoc(struct aead_request *req,
-					  struct scatterlist *assoc,
-					  unsigned int assoclen)
-{
-	req->assoc = assoc;
-	req->assoclen = assoclen;
-}
-
-/**
  * DOC: Synchronous Block Cipher API
  *
  * The synchronous block cipher API is used with the ciphers of type
diff --git a/include/linux/module.h b/include/linux/module.h
index c883b86..1e54360 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -655,4 +655,16 @@ static inline void module_bug_finalize(const Elf_Ehdr *hdr,
 static inline void module_bug_cleanup(struct module *mod) {}
 #endif	/* CONFIG_GENERIC_BUG */
 
+#ifdef CONFIG_MODULE_SIG
+static inline bool module_sig_ok(struct module *module)
+{
+	return module->sig_ok;
+}
+#else	/* !CONFIG_MODULE_SIG */
+static inline bool module_sig_ok(struct module *module)
+{
+	return true;
+}
+#endif	/* CONFIG_MODULE_SIG */
+
 #endif /* _LINUX_MODULE_H */
diff --git a/include/linux/nx842.h b/include/linux/nx842.h
index a4d324c..4ddf68d 100644
--- a/include/linux/nx842.h
+++ b/include/linux/nx842.h
@@ -1,11 +1,24 @@
 #ifndef __NX842_H__
 #define __NX842_H__
 
-int nx842_get_workmem_size(void);
-int nx842_get_workmem_size_aligned(void);
+#define __NX842_PSERIES_MEM_COMPRESS	(10240)
+#define __NX842_POWERNV_MEM_COMPRESS	(1024)
+
+#define NX842_MEM_COMPRESS	(max_t(unsigned int,			\
+	__NX842_PSERIES_MEM_COMPRESS, __NX842_POWERNV_MEM_COMPRESS))
+
+struct nx842_constraints {
+	int alignment;
+	int multiple;
+	int minimum;
+	int maximum;
+};
+
+int nx842_constraints(struct nx842_constraints *constraints);
+
 int nx842_compress(const unsigned char *in, unsigned int in_len,
-		unsigned char *out, unsigned int *out_len, void *wrkmem);
+		   unsigned char *out, unsigned int *out_len, void *wrkmem);
 int nx842_decompress(const unsigned char *in, unsigned int in_len,
-		unsigned char *out, unsigned int *out_len, void *wrkmem);
+		     unsigned char *out, unsigned int *out_len, void *wrkmem);
 
 #endif
diff --git a/include/linux/random.h b/include/linux/random.h
index b05856e..796267d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -14,6 +14,7 @@ extern void add_input_randomness(unsigned int type, unsigned int code,
 extern void add_interrupt_randomness(int irq, int irq_flags);
 
 extern void get_random_bytes(void *buf, int nbytes);
+extern void get_blocking_random_bytes(void *buf, int nbytes);
 extern void get_random_bytes_arch(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 extern int random_int_secret_init(void);
diff --git a/include/linux/sw842.h b/include/linux/sw842.h
new file mode 100644
index 0000000..109ba04
--- /dev/null
+++ b/include/linux/sw842.h
@@ -0,0 +1,12 @@
+#ifndef __SW842_H__
+#define __SW842_H__
+
+#define SW842_MEM_COMPRESS	(0xf000)
+
+int sw842_compress(const u8 *src, unsigned int srclen,
+		   u8 *dst, unsigned int *destlen, void *wmem);
+
+int sw842_decompress(const u8 *src, unsigned int srclen,
+		     u8 *dst, unsigned int *destlen);
+
+#endif
diff --git a/lib/842/842.h b/lib/842/842.h
new file mode 100644
index 0000000..7c20003
--- /dev/null
+++ b/lib/842/842.h
@@ -0,0 +1,127 @@
+
+#ifndef __842_H__
+#define __842_H__
+
+/* The 842 compressed format is made up of multiple blocks, each of
+ * which have the format:
+ *
+ * <template>[arg1][arg2][arg3][arg4]
+ *
+ * where there are between 0 and 4 template args, depending on the specific
+ * template operation.  For normal operations, each arg is either a specific
+ * number of data bytes to add to the output buffer, or an index pointing
+ * to a previously-written number of data bytes to copy to the output buffer.
+ *
+ * The template code is a 5-bit value.  This code indicates what to do with
+ * the following data.  Template codes from 0 to 0x19 should use the template
+ * table, the static "decomp_ops" table used in decompress.  For each template
+ * (table row), there are between 1 and 4 actions; each action corresponds to
+ * an arg following the template code bits.  Each action is either a "data"
+ * type action, or a "index" type action, and each action results in 2, 4, or 8
+ * bytes being written to the output buffer.  Each template (i.e. all actions
+ * in the table row) will add up to 8 bytes being written to the output buffer.
+ * Any row with less than 4 actions is padded with noop actions, indicated by
+ * N0 (for which there is no corresponding arg in the compressed data buffer).
+ *
+ * "Data" actions, indicated in the table by D2, D4, and D8, mean that the
+ * corresponding arg is 2, 4, or 8 bytes, respectively, in the compressed data
+ * buffer should be copied directly to the output buffer.
+ *
+ * "Index" actions, indicated in the table by I2, I4, and I8, mean the
+ * corresponding arg is an index parameter that points to, respectively, a 2,
+ * 4, or 8 byte value already in the output buffer, that should be copied to
+ * the end of the output buffer.  Essentially, the index points to a position
+ * in a ring buffer that contains the last N bytes of output buffer data.
+ * The number of bits for each index's arg are: 8 bits for I2, 9 bits for I4,
+ * and 8 bits for I8.  Since each index points to a 2, 4, or 8 byte section,
+ * this means that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4
+ * can reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference 2048
+ * bytes ((2^8 = 256) * 8 bytes).  Think of it as a kind-of ring buffer for
+ * each of I2, I4, and I8 that are updated for each byte written to the output
+ * buffer.  In this implementation, the output buffer is directly used for each
+ * index; there is no additional memory required.  Note that the index is into
+ * a ring buffer, not a sliding window; for example, if there have been 260
+ * bytes written to the output buffer, an I2 index of 0 would index to byte 256
+ * in the output buffer, while an I2 index of 16 would index to byte 16 in the
+ * output buffer.
+ *
+ * There are also 3 special template codes; 0x1b for "repeat", 0x1c for
+ * "zeros", and 0x1e for "end".  The "repeat" operation is followed by a 6 bit
+ * arg N indicating how many times to repeat.  The last 8 bytes written to the
+ * output buffer are written again to the output buffer, N + 1 times.  The
+ * "zeros" operation, which has no arg bits, writes 8 zeros to the output
+ * buffer.  The "end" operation, which also has no arg bits, signals the end
+ * of the compressed data.  There may be some number of padding (don't care,
+ * but usually 0) bits after the "end" operation bits, to fill the buffer
+ * length to a specific byte multiple (usually a multiple of 8, 16, or 32
+ * bytes).
+ *
+ * This software implementation also uses one of the undefined template values,
+ * 0x1d as a special "short data" template code, to represent less than 8 bytes
+ * of uncompressed data.  It is followed by a 3 bit arg N indicating how many
+ * data bytes will follow, and then N bytes of data, which should be copied to
+ * the output buffer.  This allows the software 842 compressor to accept input
+ * buffers that are not an exact multiple of 8 bytes long.  However, those
+ * compressed buffers containing this sw-only template will be rejected by
+ * the 842 hardware decompressor, and must be decompressed with this software
+ * library.  The 842 software compression module includes a parameter to
+ * disable using this sw-only "short data" template, and instead simply
+ * reject any input buffer that is not a multiple of 8 bytes long.
+ *
+ * After all actions for each operation code are processed, another template
+ * code is in the next 5 bits.  The decompression ends once the "end" template
+ * code is detected.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <asm/unaligned.h>
+
+#include <linux/sw842.h>
+
+/* special templates */
+#define OP_REPEAT	(0x1B)
+#define OP_ZEROS	(0x1C)
+#define OP_END		(0x1E)
+
+/* sw only template - this is not in the hw design; it's used only by this
+ * software compressor and decompressor, to allow input buffers that aren't
+ * a multiple of 8.
+ */
+#define OP_SHORT_DATA	(0x1D)
+
+/* additional bits of each op param */
+#define OP_BITS		(5)
+#define REPEAT_BITS	(6)
+#define SHORT_DATA_BITS	(3)
+#define I2_BITS		(8)
+#define I4_BITS		(9)
+#define I8_BITS		(8)
+
+#define REPEAT_BITS_MAX		(0x3f)
+#define SHORT_DATA_BITS_MAX	(0x7)
+
+/* Arbitrary values used to indicate action */
+#define OP_ACTION	(0x70)
+#define OP_ACTION_INDEX	(0x10)
+#define OP_ACTION_DATA	(0x20)
+#define OP_ACTION_NOOP	(0x40)
+#define OP_AMOUNT	(0x0f)
+#define OP_AMOUNT_0	(0x00)
+#define OP_AMOUNT_2	(0x02)
+#define OP_AMOUNT_4	(0x04)
+#define OP_AMOUNT_8	(0x08)
+
+#define D2		(OP_ACTION_DATA  | OP_AMOUNT_2)
+#define D4		(OP_ACTION_DATA  | OP_AMOUNT_4)
+#define D8		(OP_ACTION_DATA  | OP_AMOUNT_8)
+#define I2		(OP_ACTION_INDEX | OP_AMOUNT_2)
+#define I4		(OP_ACTION_INDEX | OP_AMOUNT_4)
+#define I8		(OP_ACTION_INDEX | OP_AMOUNT_8)
+#define N0		(OP_ACTION_NOOP  | OP_AMOUNT_0)
+
+/* the max of the regular templates - not including the special templates */
+#define OPS_MAX		(0x1a)
+
+#endif
diff --git a/lib/842/842_compress.c b/lib/842/842_compress.c
new file mode 100644
index 0000000..7ce6894
--- /dev/null
+++ b/lib/842/842_compress.c
@@ -0,0 +1,626 @@
+/*
+ * 842 Software Compression
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * See 842.h for details of the 842 compressed format.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define MODULE_NAME "842_compress"
+
+#include <linux/hashtable.h>
+
+#include "842.h"
+#include "842_debugfs.h"
+
+#define SW842_HASHTABLE8_BITS	(10)
+#define SW842_HASHTABLE4_BITS	(11)
+#define SW842_HASHTABLE2_BITS	(10)
+
+/* By default, we allow compressing input buffers of any length, but we must
+ * use the non-standard "short data" template so the decompressor can correctly
+ * reproduce the uncompressed data buffer at the right length.  However the
+ * hardware 842 compressor will not recognize the "short data" template, and
+ * will fail to decompress any compressed buffer containing it (I have no idea
+ * why anyone would want to use software to compress and hardware to decompress
+ * but that's beside the point).  This parameter forces the compression
+ * function to simply reject any input buffer that isn't a multiple of 8 bytes
+ * long, instead of using the "short data" template, so that all compressed
+ * buffers produced by this function will be decompressable by the 842 hardware
+ * decompressor.  Unless you have a specific need for that, leave this disabled
+ * so that any length buffer can be compressed.
+ */
+static bool sw842_strict;
+module_param_named(strict, sw842_strict, bool, 0644);
+
+static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
+	{ I8, N0, N0, N0, 0x19 }, /* 8 */
+	{ I4, I4, N0, N0, 0x18 }, /* 18 */
+	{ I4, I2, I2, N0, 0x17 }, /* 25 */
+	{ I2, I2, I4, N0, 0x13 }, /* 25 */
+	{ I2, I2, I2, I2, 0x12 }, /* 32 */
+	{ I4, I2, D2, N0, 0x16 }, /* 33 */
+	{ I4, D2, I2, N0, 0x15 }, /* 33 */
+	{ I2, D2, I4, N0, 0x0e }, /* 33 */
+	{ D2, I2, I4, N0, 0x09 }, /* 33 */
+	{ I2, I2, I2, D2, 0x11 }, /* 40 */
+	{ I2, I2, D2, I2, 0x10 }, /* 40 */
+	{ I2, D2, I2, I2, 0x0d }, /* 40 */
+	{ D2, I2, I2, I2, 0x08 }, /* 40 */
+	{ I4, D4, N0, N0, 0x14 }, /* 41 */
+	{ D4, I4, N0, N0, 0x04 }, /* 41 */
+	{ I2, I2, D4, N0, 0x0f }, /* 48 */
+	{ I2, D2, I2, D2, 0x0c }, /* 48 */
+	{ I2, D4, I2, N0, 0x0b }, /* 48 */
+	{ D2, I2, I2, D2, 0x07 }, /* 48 */
+	{ D2, I2, D2, I2, 0x06 }, /* 48 */
+	{ D4, I2, I2, N0, 0x03 }, /* 48 */
+	{ I2, D2, D4, N0, 0x0a }, /* 56 */
+	{ D2, I2, D4, N0, 0x05 }, /* 56 */
+	{ D4, I2, D2, N0, 0x02 }, /* 56 */
+	{ D4, D2, I2, N0, 0x01 }, /* 56 */
+	{ D8, N0, N0, N0, 0x00 }, /* 64 */
+};
+
+struct sw842_hlist_node8 {
+	struct hlist_node node;
+	u64 data;
+	u8 index;
+};
+
+struct sw842_hlist_node4 {
+	struct hlist_node node;
+	u32 data;
+	u16 index;
+};
+
+struct sw842_hlist_node2 {
+	struct hlist_node node;
+	u16 data;
+	u8 index;
+};
+
+#define INDEX_NOT_FOUND		(-1)
+#define INDEX_NOT_CHECKED	(-2)
+
+struct sw842_param {
+	u8 *in;
+	u8 *instart;
+	u64 ilen;
+	u8 *out;
+	u64 olen;
+	u8 bit;
+	u64 data8[1];
+	u32 data4[2];
+	u16 data2[4];
+	int index8[1];
+	int index4[2];
+	int index2[4];
+	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
+	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
+	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
+	struct sw842_hlist_node8 node8[1 << I8_BITS];
+	struct sw842_hlist_node4 node4[1 << I4_BITS];
+	struct sw842_hlist_node2 node2[1 << I2_BITS];
+};
+
+#define get_input_data(p, o, b)						\
+	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
+
+#define init_hashtable_nodes(p, b)	do {			\
+	int _i;							\
+	hash_init((p)->htable##b);				\
+	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
+		(p)->node##b[_i].index = _i;			\
+		(p)->node##b[_i].data = 0;			\
+		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
+	}							\
+} while (0)
+
+#define find_index(p, b, n)	({					\
+	struct sw842_hlist_node##b *_n;					\
+	p->index##b[n] = INDEX_NOT_FOUND;				\
+	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
+		if (p->data##b[n] == _n->data) {			\
+			p->index##b[n] = _n->index;			\
+			break;						\
+		}							\
+	}								\
+	p->index##b[n] >= 0;						\
+})
+
+#define check_index(p, b, n)			\
+	((p)->index##b[n] == INDEX_NOT_CHECKED	\
+	 ? find_index(p, b, n)			\
+	 : (p)->index##b[n] >= 0)
+
+#define replace_hash(p, b, i, d)	do {				\
+	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
+	hash_del(&_n->node);						\
+	_n->data = (p)->data##b[d];					\
+	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
+		 (unsigned int)_n->index,				\
+		 (unsigned int)((p)->in - (p)->instart),		\
+		 (unsigned long)_n->data);				\
+	hash_add((p)->htable##b, &_n->node, _n->data);			\
+} while (0)
+
+static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
+
+static int add_bits(struct sw842_param *p, u64 d, u8 n);
+
+static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
+{
+	int ret;
+
+	if (n <= s)
+		return -EINVAL;
+
+	ret = add_bits(p, d >> s, n - s);
+	if (ret)
+		return ret;
+	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
+}
+
+static int add_bits(struct sw842_param *p, u64 d, u8 n)
+{
+	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
+	u64 o;
+	u8 *out = p->out;
+
+	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
+
+	if (n > 64)
+		return -EINVAL;
+
+	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
+	 * or if we're at the end of the output buffer and would write past end
+	 */
+	if (bits > 64)
+		return __split_add_bits(p, d, n, 32);
+	else if (p->olen < 8 && bits > 32 && bits <= 56)
+		return __split_add_bits(p, d, n, 16);
+	else if (p->olen < 4 && bits > 16 && bits <= 24)
+		return __split_add_bits(p, d, n, 8);
+
+	if (DIV_ROUND_UP(bits, 8) > p->olen)
+		return -ENOSPC;
+
+	o = *out & bmask[b];
+	d <<= s;
+
+	if (bits <= 8)
+		*out = o | d;
+	else if (bits <= 16)
+		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
+	else if (bits <= 24)
+		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
+	else if (bits <= 32)
+		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
+	else if (bits <= 40)
+		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
+	else if (bits <= 48)
+		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
+	else if (bits <= 56)
+		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
+	else
+		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
+
+	p->bit += n;
+
+	if (p->bit > 7) {
+		p->out += p->bit / 8;
+		p->olen -= p->bit / 8;
+		p->bit %= 8;
+	}
+
+	return 0;
+}
+
+static int add_template(struct sw842_param *p, u8 c)
+{
+	int ret, i, b = 0;
+	u8 *t = comp_ops[c];
+	bool inv = false;
+
+	if (c >= OPS_MAX)
+		return -EINVAL;
+
+	pr_debug("template %x\n", t[4]);
+
+	ret = add_bits(p, t[4], OP_BITS);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < 4; i++) {
+		pr_debug("op %x\n", t[i]);
+
+		switch (t[i] & OP_AMOUNT) {
+		case OP_AMOUNT_8:
+			if (b)
+				inv = true;
+			else if (t[i] & OP_ACTION_INDEX)
+				ret = add_bits(p, p->index8[0], I8_BITS);
+			else if (t[i] & OP_ACTION_DATA)
+				ret = add_bits(p, p->data8[0], 64);
+			else
+				inv = true;
+			break;
+		case OP_AMOUNT_4:
+			if (b == 2 && t[i] & OP_ACTION_DATA)
+				ret = add_bits(p, get_input_data(p, 2, 32), 32);
+			else if (b != 0 && b != 4)
+				inv = true;
+			else if (t[i] & OP_ACTION_INDEX)
+				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
+			else if (t[i] & OP_ACTION_DATA)
+				ret = add_bits(p, p->data4[b >> 2], 32);
+			else
+				inv = true;
+			break;
+		case OP_AMOUNT_2:
+			if (b != 0 && b != 2 && b != 4 && b != 6)
+				inv = true;
+			if (t[i] & OP_ACTION_INDEX)
+				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
+			else if (t[i] & OP_ACTION_DATA)
+				ret = add_bits(p, p->data2[b >> 1], 16);
+			else
+				inv = true;
+			break;
+		case OP_AMOUNT_0:
+			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
+			break;
+		default:
+			inv = true;
+			break;
+		}
+
+		if (ret)
+			return ret;
+
+		if (inv) {
+			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
+			       c, i, t[0], t[1], t[2], t[3]);
+			return -EINVAL;
+		}
+
+		b += t[i] & OP_AMOUNT;
+	}
+
+	if (b != 8) {
+		pr_err("Invalid template %x len %x : %x %x %x %x\n",
+		       c, b, t[0], t[1], t[2], t[3]);
+		return -EINVAL;
+	}
+
+	if (sw842_template_counts)
+		atomic_inc(&template_count[t[4]]);
+
+	return 0;
+}
+
+static int add_repeat_template(struct sw842_param *p, u8 r)
+{
+	int ret;
+
+	/* repeat param is 0-based */
+	if (!r || --r > REPEAT_BITS_MAX)
+		return -EINVAL;
+
+	ret = add_bits(p, OP_REPEAT, OP_BITS);
+	if (ret)
+		return ret;
+
+	ret = add_bits(p, r, REPEAT_BITS);
+	if (ret)
+		return ret;
+
+	if (sw842_template_counts)
+		atomic_inc(&template_repeat_count);
+
+	return 0;
+}
+
+static int add_short_data_template(struct sw842_param *p, u8 b)
+{
+	int ret, i;
+
+	if (!b || b > SHORT_DATA_BITS_MAX)
+		return -EINVAL;
+
+	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
+	if (ret)
+		return ret;
+
+	ret = add_bits(p, b, SHORT_DATA_BITS);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < b; i++) {
+		ret = add_bits(p, p->in[i], 8);
+		if (ret)
+			return ret;
+	}
+
+	if (sw842_template_counts)
+		atomic_inc(&template_short_data_count);
+
+	return 0;
+}
+
+static int add_zeros_template(struct sw842_param *p)
+{
+	int ret = add_bits(p, OP_ZEROS, OP_BITS);
+
+	if (ret)
+		return ret;
+
+	if (sw842_template_counts)
+		atomic_inc(&template_zeros_count);
+
+	return 0;
+}
+
+static int add_end_template(struct sw842_param *p)
+{
+	int ret = add_bits(p, OP_END, OP_BITS);
+
+	if (ret)
+		return ret;
+
+	if (sw842_template_counts)
+		atomic_inc(&template_end_count);
+
+	return 0;
+}
+
+static bool check_template(struct sw842_param *p, u8 c)
+{
+	u8 *t = comp_ops[c];
+	int i, match, b = 0;
+
+	if (c >= OPS_MAX)
+		return false;
+
+	for (i = 0; i < 4; i++) {
+		if (t[i] & OP_ACTION_INDEX) {
+			if (t[i] & OP_AMOUNT_2)
+				match = check_index(p, 2, b >> 1);
+			else if (t[i] & OP_AMOUNT_4)
+				match = check_index(p, 4, b >> 2);
+			else if (t[i] & OP_AMOUNT_8)
+				match = check_index(p, 8, 0);
+			else
+				return false;
+			if (!match)
+				return false;
+		}
+
+		b += t[i] & OP_AMOUNT;
+	}
+
+	return true;
+}
+
+static void get_next_data(struct sw842_param *p)
+{
+	p->data8[0] = get_input_data(p, 0, 64);
+	p->data4[0] = get_input_data(p, 0, 32);
+	p->data4[1] = get_input_data(p, 4, 32);
+	p->data2[0] = get_input_data(p, 0, 16);
+	p->data2[1] = get_input_data(p, 2, 16);
+	p->data2[2] = get_input_data(p, 4, 16);
+	p->data2[3] = get_input_data(p, 6, 16);
+}
+
+/* update the hashtable entries.
+ * only call this after finding/adding the current template
+ * the dataN fields for the current 8 byte block must be already updated
+ */
+static void update_hashtables(struct sw842_param *p)
+{
+	u64 pos = p->in - p->instart;
+	u64 n8 = (pos >> 3) % (1 << I8_BITS);
+	u64 n4 = (pos >> 2) % (1 << I4_BITS);
+	u64 n2 = (pos >> 1) % (1 << I2_BITS);
+
+	replace_hash(p, 8, n8, 0);
+	replace_hash(p, 4, n4, 0);
+	replace_hash(p, 4, n4, 1);
+	replace_hash(p, 2, n2, 0);
+	replace_hash(p, 2, n2, 1);
+	replace_hash(p, 2, n2, 2);
+	replace_hash(p, 2, n2, 3);
+}
+
+/* find the next template to use, and add it
+ * the p->dataN fields must already be set for the current 8 byte block
+ */
+static int process_next(struct sw842_param *p)
+{
+	int ret, i;
+
+	p->index8[0] = INDEX_NOT_CHECKED;
+	p->index4[0] = INDEX_NOT_CHECKED;
+	p->index4[1] = INDEX_NOT_CHECKED;
+	p->index2[0] = INDEX_NOT_CHECKED;
+	p->index2[1] = INDEX_NOT_CHECKED;
+	p->index2[2] = INDEX_NOT_CHECKED;
+	p->index2[3] = INDEX_NOT_CHECKED;
+
+	/* check up to OPS_MAX - 1; last op is our fallback */
+	for (i = 0; i < OPS_MAX - 1; i++) {
+		if (check_template(p, i))
+			break;
+	}
+
+	ret = add_template(p, i);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * sw842_compress
+ *
+ * Compress the uncompressed buffer of length @ilen at @in to the output buffer
+ * @out, using no more than @olen bytes, using the 842 compression format.
+ *
+ * Returns: 0 on success, error on failure.  The @olen parameter
+ * will contain the number of output bytes written on success, or
+ * 0 on error.
+ */
+int sw842_compress(const u8 *in, unsigned int ilen,
+		   u8 *out, unsigned int *olen, void *wmem)
+{
+	struct sw842_param *p = (struct sw842_param *)wmem;
+	int ret;
+	u64 last, next, pad, total;
+	u8 repeat_count = 0;
+
+	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
+
+	init_hashtable_nodes(p, 8);
+	init_hashtable_nodes(p, 4);
+	init_hashtable_nodes(p, 2);
+
+	p->in = (u8 *)in;
+	p->instart = p->in;
+	p->ilen = ilen;
+	p->out = out;
+	p->olen = *olen;
+	p->bit = 0;
+
+	total = p->olen;
+
+	*olen = 0;
+
+	/* if using strict mode, we can only compress a multiple of 8 */
+	if (sw842_strict && (ilen % 8)) {
+		pr_err("Using strict mode, can't compress len %d\n", ilen);
+		return -EINVAL;
+	}
+
+	/* let's compress at least 8 bytes, mkay? */
+	if (unlikely(ilen < 8))
+		goto skip_comp;
+
+	/* make initial 'last' different so we don't match the first time */
+	last = ~get_unaligned((u64 *)p->in);
+
+	while (p->ilen > 7) {
+		next = get_unaligned((u64 *)p->in);
+
+		/* must get the next data, as we need to update the hashtable
+		 * entries with the new data every time
+		 */
+		get_next_data(p);
+
+		/* we don't care about endianness in last or next;
+		 * we're just comparing 8 bytes to another 8 bytes,
+		 * they're both the same endianness
+		 */
+		if (next == last) {
+			/* repeat count bits are 0-based, so we stop at +1 */
+			if (++repeat_count <= REPEAT_BITS_MAX)
+				goto repeat;
+		}
+		if (repeat_count) {
+			ret = add_repeat_template(p, repeat_count);
+			repeat_count = 0;
+			if (next == last) /* reached max repeat bits */
+				goto repeat;
+		}
+
+		if (next == 0)
+			ret = add_zeros_template(p);
+		else
+			ret = process_next(p);
+
+		if (ret)
+			return ret;
+
+repeat:
+		last = next;
+		update_hashtables(p);
+		p->in += 8;
+		p->ilen -= 8;
+	}
+
+	if (repeat_count) {
+		ret = add_repeat_template(p, repeat_count);
+		if (ret)
+			return ret;
+	}
+
+skip_comp:
+	if (p->ilen > 0) {
+		ret = add_short_data_template(p, p->ilen);
+		if (ret)
+			return ret;
+
+		p->in += p->ilen;
+		p->ilen = 0;
+	}
+
+	ret = add_end_template(p);
+	if (ret)
+		return ret;
+
+	if (p->bit) {
+		p->out++;
+		p->olen--;
+		p->bit = 0;
+	}
+
+	/* pad compressed length to multiple of 8 */
+	pad = (8 - ((total - p->olen) % 8)) % 8;
+	if (pad) {
+		if (pad > p->olen) /* we were so close! */
+			return -ENOSPC;
+		memset(p->out, 0, pad);
+		p->out += pad;
+		p->olen -= pad;
+	}
+
+	if (unlikely((total - p->olen) > UINT_MAX))
+		return -ENOSPC;
+
+	*olen = total - p->olen;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sw842_compress);
+
+static int __init sw842_init(void)
+{
+	if (sw842_template_counts)
+		sw842_debugfs_create();
+
+	return 0;
+}
+module_init(sw842_init);
+
+static void __exit sw842_exit(void)
+{
+	if (sw842_template_counts)
+		sw842_debugfs_remove();
+}
+module_exit(sw842_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Software 842 Compressor");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
diff --git a/lib/842/842_debugfs.h b/lib/842/842_debugfs.h
new file mode 100644
index 0000000..e7f3bff
--- /dev/null
+++ b/lib/842/842_debugfs.h
@@ -0,0 +1,52 @@
+
+#ifndef __842_DEBUGFS_H__
+#define __842_DEBUGFS_H__
+
+#include <linux/debugfs.h>
+
+static bool sw842_template_counts;
+module_param_named(template_counts, sw842_template_counts, bool, 0444);
+
+static atomic_t template_count[OPS_MAX], template_repeat_count,
+	template_zeros_count, template_short_data_count, template_end_count;
+
+static struct dentry *sw842_debugfs_root;
+
+static int __init sw842_debugfs_create(void)
+{
+	umode_t m = S_IRUGO | S_IWUSR;
+	int i;
+
+	if (!debugfs_initialized())
+		return -ENODEV;
+
+	sw842_debugfs_root = debugfs_create_dir(MODULE_NAME, NULL);
+	if (IS_ERR(sw842_debugfs_root))
+		return PTR_ERR(sw842_debugfs_root);
+
+	for (i = 0; i < ARRAY_SIZE(template_count); i++) {
+		char name[32];
+
+		snprintf(name, 32, "template_%02x", i);
+		debugfs_create_atomic_t(name, m, sw842_debugfs_root,
+					&template_count[i]);
+	}
+	debugfs_create_atomic_t("template_repeat", m, sw842_debugfs_root,
+				&template_repeat_count);
+	debugfs_create_atomic_t("template_zeros", m, sw842_debugfs_root,
+				&template_zeros_count);
+	debugfs_create_atomic_t("template_short_data", m, sw842_debugfs_root,
+				&template_short_data_count);
+	debugfs_create_atomic_t("template_end", m, sw842_debugfs_root,
+				&template_end_count);
+
+	return 0;
+}
+
+static void __exit sw842_debugfs_remove(void)
+{
+	if (sw842_debugfs_root && !IS_ERR(sw842_debugfs_root))
+		debugfs_remove_recursive(sw842_debugfs_root);
+}
+
+#endif
diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c
new file mode 100644
index 0000000..5446ff0
--- /dev/null
+++ b/lib/842/842_decompress.c
@@ -0,0 +1,405 @@
+/*
+ * 842 Software Decompression
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * See 842.h for details of the 842 compressed format.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define MODULE_NAME "842_decompress"
+
+#include "842.h"
+#include "842_debugfs.h"
+
+/* rolling fifo sizes */
+#define I2_FIFO_SIZE	(2 * (1 << I2_BITS))
+#define I4_FIFO_SIZE	(4 * (1 << I4_BITS))
+#define I8_FIFO_SIZE	(8 * (1 << I8_BITS))
+
+static u8 decomp_ops[OPS_MAX][4] = {
+	{ D8, N0, N0, N0 },
+	{ D4, D2, I2, N0 },
+	{ D4, I2, D2, N0 },
+	{ D4, I2, I2, N0 },
+	{ D4, I4, N0, N0 },
+	{ D2, I2, D4, N0 },
+	{ D2, I2, D2, I2 },
+	{ D2, I2, I2, D2 },
+	{ D2, I2, I2, I2 },
+	{ D2, I2, I4, N0 },
+	{ I2, D2, D4, N0 },
+	{ I2, D4, I2, N0 },
+	{ I2, D2, I2, D2 },
+	{ I2, D2, I2, I2 },
+	{ I2, D2, I4, N0 },
+	{ I2, I2, D4, N0 },
+	{ I2, I2, D2, I2 },
+	{ I2, I2, I2, D2 },
+	{ I2, I2, I2, I2 },
+	{ I2, I2, I4, N0 },
+	{ I4, D4, N0, N0 },
+	{ I4, D2, I2, N0 },
+	{ I4, I2, D2, N0 },
+	{ I4, I2, I2, N0 },
+	{ I4, I4, N0, N0 },
+	{ I8, N0, N0, N0 }
+};
+
+struct sw842_param {
+	u8 *in;
+	u8 bit;
+	u64 ilen;
+	u8 *out;
+	u8 *ostart;
+	u64 olen;
+};
+
+#define beN_to_cpu(d, s)					\
+	((s) == 2 ? be16_to_cpu(get_unaligned((__be16 *)d)) :	\
+	 (s) == 4 ? be32_to_cpu(get_unaligned((__be32 *)d)) :	\
+	 (s) == 8 ? be64_to_cpu(get_unaligned((__be64 *)d)) :	\
+	 WARN(1, "pr_debug param err invalid size %x\n", s))
+
+static int next_bits(struct sw842_param *p, u64 *d, u8 n);
+
+static int __split_next_bits(struct sw842_param *p, u64 *d, u8 n, u8 s)
+{
+	u64 tmp = 0;
+	int ret;
+
+	if (n <= s) {
+		pr_debug("split_next_bits invalid n %u s %u\n", n, s);
+		return -EINVAL;
+	}
+
+	ret = next_bits(p, &tmp, n - s);
+	if (ret)
+		return ret;
+	ret = next_bits(p, d, s);
+	if (ret)
+		return ret;
+	*d |= tmp << s;
+	return 0;
+}
+
+static int next_bits(struct sw842_param *p, u64 *d, u8 n)
+{
+	u8 *in = p->in, b = p->bit, bits = b + n;
+
+	if (n > 64) {
+		pr_debug("next_bits invalid n %u\n", n);
+		return -EINVAL;
+	}
+
+	/* split this up if reading > 8 bytes, or if we're at the end of
+	 * the input buffer and would read past the end
+	 */
+	if (bits > 64)
+		return __split_next_bits(p, d, n, 32);
+	else if (p->ilen < 8 && bits > 32 && bits <= 56)
+		return __split_next_bits(p, d, n, 16);
+	else if (p->ilen < 4 && bits > 16 && bits <= 24)
+		return __split_next_bits(p, d, n, 8);
+
+	if (DIV_ROUND_UP(bits, 8) > p->ilen)
+		return -EOVERFLOW;
+
+	if (bits <= 8)
+		*d = *in >> (8 - bits);
+	else if (bits <= 16)
+		*d = be16_to_cpu(get_unaligned((__be16 *)in)) >> (16 - bits);
+	else if (bits <= 32)
+		*d = be32_to_cpu(get_unaligned((__be32 *)in)) >> (32 - bits);
+	else
+		*d = be64_to_cpu(get_unaligned((__be64 *)in)) >> (64 - bits);
+
+	*d &= GENMASK_ULL(n - 1, 0);
+
+	p->bit += n;
+
+	if (p->bit > 7) {
+		p->in += p->bit / 8;
+		p->ilen -= p->bit / 8;
+		p->bit %= 8;
+	}
+
+	return 0;
+}
+
+static int do_data(struct sw842_param *p, u8 n)
+{
+	u64 v;
+	int ret;
+
+	if (n > p->olen)
+		return -ENOSPC;
+
+	ret = next_bits(p, &v, n * 8);
+	if (ret)
+		return ret;
+
+	switch (n) {
+	case 2:
+		put_unaligned(cpu_to_be16((u16)v), (__be16 *)p->out);
+		break;
+	case 4:
+		put_unaligned(cpu_to_be32((u32)v), (__be32 *)p->out);
+		break;
+	case 8:
+		put_unaligned(cpu_to_be64((u64)v), (__be64 *)p->out);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	p->out += n;
+	p->olen -= n;
+
+	return 0;
+}
+
+static int __do_index(struct sw842_param *p, u8 size, u8 bits, u64 fsize)
+{
+	u64 index, offset, total = round_down(p->out - p->ostart, 8);
+	int ret;
+
+	ret = next_bits(p, &index, bits);
+	if (ret)
+		return ret;
+
+	offset = index * size;
+
+	/* a ring buffer of fsize is used; correct the offset */
+	if (total > fsize) {
+		/* this is where the current fifo is */
+		u64 section = round_down(total, fsize);
+		/* the current pos in the fifo */
+		u64 pos = total - section;
+
+		/* if the offset is past/at the pos, we need to
+		 * go back to the last fifo section
+		 */
+		if (offset >= pos)
+			section -= fsize;
+
+		offset += section;
+	}
+
+	if (offset + size > total) {
+		pr_debug("index%x %lx points past end %lx\n", size,
+			 (unsigned long)offset, (unsigned long)total);
+		return -EINVAL;
+	}
+
+	pr_debug("index%x to %lx off %lx adjoff %lx tot %lx data %lx\n",
+		 size, (unsigned long)index, (unsigned long)(index * size),
+		 (unsigned long)offset, (unsigned long)total,
+		 (unsigned long)beN_to_cpu(&p->ostart[offset], size));
+
+	memcpy(p->out, &p->ostart[offset], size);
+	p->out += size;
+	p->olen -= size;
+
+	return 0;
+}
+
+static int do_index(struct sw842_param *p, u8 n)
+{
+	switch (n) {
+	case 2:
+		return __do_index(p, 2, I2_BITS, I2_FIFO_SIZE);
+	case 4:
+		return __do_index(p, 4, I4_BITS, I4_FIFO_SIZE);
+	case 8:
+		return __do_index(p, 8, I8_BITS, I8_FIFO_SIZE);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int do_op(struct sw842_param *p, u8 o)
+{
+	int i, ret = 0;
+
+	if (o >= OPS_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < 4; i++) {
+		u8 op = decomp_ops[o][i];
+
+		pr_debug("op is %x\n", op);
+
+		switch (op & OP_ACTION) {
+		case OP_ACTION_DATA:
+			ret = do_data(p, op & OP_AMOUNT);
+			break;
+		case OP_ACTION_INDEX:
+			ret = do_index(p, op & OP_AMOUNT);
+			break;
+		case OP_ACTION_NOOP:
+			break;
+		default:
+			pr_err("Interal error, invalid op %x\n", op);
+			return -EINVAL;
+		}
+
+		if (ret)
+			return ret;
+	}
+
+	if (sw842_template_counts)
+		atomic_inc(&template_count[o]);
+
+	return 0;
+}
+
+/**
+ * sw842_decompress
+ *
+ * Decompress the 842-compressed buffer of length @ilen at @in
+ * to the output buffer @out, using no more than @olen bytes.
+ *
+ * The compressed buffer must be only a single 842-compressed buffer,
+ * with the standard format described in the comments in 842.h
+ * Processing will stop when the 842 "END" template is detected,
+ * not the end of the buffer.
+ *
+ * Returns: 0 on success, error on failure.  The @olen parameter
+ * will contain the number of output bytes written on success, or
+ * 0 on error.
+ */
+int sw842_decompress(const u8 *in, unsigned int ilen,
+		     u8 *out, unsigned int *olen)
+{
+	struct sw842_param p;
+	int ret;
+	u64 op, rep, tmp, bytes, total;
+
+	p.in = (u8 *)in;
+	p.bit = 0;
+	p.ilen = ilen;
+	p.out = out;
+	p.ostart = out;
+	p.olen = *olen;
+
+	total = p.olen;
+
+	*olen = 0;
+
+	do {
+		ret = next_bits(&p, &op, OP_BITS);
+		if (ret)
+			return ret;
+
+		pr_debug("template is %lx\n", (unsigned long)op);
+
+		switch (op) {
+		case OP_REPEAT:
+			ret = next_bits(&p, &rep, REPEAT_BITS);
+			if (ret)
+				return ret;
+
+			if (p.out == out) /* no previous bytes */
+				return -EINVAL;
+
+			/* copy rep + 1 */
+			rep++;
+
+			if (rep * 8 > p.olen)
+				return -ENOSPC;
+
+			while (rep-- > 0) {
+				memcpy(p.out, p.out - 8, 8);
+				p.out += 8;
+				p.olen -= 8;
+			}
+
+			if (sw842_template_counts)
+				atomic_inc(&template_repeat_count);
+
+			break;
+		case OP_ZEROS:
+			if (8 > p.olen)
+				return -ENOSPC;
+
+			memset(p.out, 0, 8);
+			p.out += 8;
+			p.olen -= 8;
+
+			if (sw842_template_counts)
+				atomic_inc(&template_zeros_count);
+
+			break;
+		case OP_SHORT_DATA:
+			ret = next_bits(&p, &bytes, SHORT_DATA_BITS);
+			if (ret)
+				return ret;
+
+			if (!bytes || bytes > SHORT_DATA_BITS_MAX)
+				return -EINVAL;
+
+			while (bytes-- > 0) {
+				ret = next_bits(&p, &tmp, 8);
+				if (ret)
+					return ret;
+				*p.out = (u8)tmp;
+				p.out++;
+				p.olen--;
+			}
+
+			if (sw842_template_counts)
+				atomic_inc(&template_short_data_count);
+
+			break;
+		case OP_END:
+			if (sw842_template_counts)
+				atomic_inc(&template_end_count);
+
+			break;
+		default: /* use template */
+			ret = do_op(&p, op);
+			if (ret)
+				return ret;
+			break;
+		}
+	} while (op != OP_END);
+
+	if (unlikely((total - p.olen) > UINT_MAX))
+		return -ENOSPC;
+
+	*olen = total - p.olen;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sw842_decompress);
+
+static int __init sw842_init(void)
+{
+	if (sw842_template_counts)
+		sw842_debugfs_create();
+
+	return 0;
+}
+module_init(sw842_init);
+
+static void __exit sw842_exit(void)
+{
+	if (sw842_template_counts)
+		sw842_debugfs_remove();
+}
+module_exit(sw842_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Software 842 Decompressor");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
diff --git a/lib/842/Makefile b/lib/842/Makefile
new file mode 100644
index 0000000..5d24c0b
--- /dev/null
+++ b/lib/842/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_842_COMPRESS) += 842_compress.o
+obj-$(CONFIG_842_DECOMPRESS) += 842_decompress.o
diff --git a/lib/Kconfig b/lib/Kconfig
index 87da53b..42c925e 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -213,6 +213,12 @@ config RANDOM32_SELFTEST
 #
 # compression support is select'ed if needed
 #
+config 842_COMPRESS
+	tristate
+
+config 842_DECOMPRESS
+	tristate
+
 config ZLIB_INFLATE
 	tristate
 
diff --git a/lib/Makefile b/lib/Makefile
index 58f74d2..a1d67e4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -78,6 +78,8 @@ obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_CRC8)	+= crc8.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
+obj-$(CONFIG_842_COMPRESS) += 842/
+obj-$(CONFIG_842_DECOMPRESS) += 842/
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 208df7c..70d53da 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -11,9 +11,8 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/aes.h>
+#include <crypto/aead.h>
 
 #include <net/mac80211.h>
 #include "key.h"
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
index fd278bb..b91c9d7 100644
--- a/net/mac80211/aes_gcm.c
+++ b/net/mac80211/aes_gcm.c
@@ -8,9 +8,8 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/err.h>
-#include <crypto/aes.h>
+#include <crypto/aead.h>
 
 #include <net/mac80211.h>
 #include "key.h"
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
index f1321b7..c34b06c 100644
--- a/net/mac80211/aes_gmac.c
+++ b/net/mac80211/aes_gmac.c
@@ -9,8 +9,8 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <linux/err.h>
+#include <crypto/aead.h>
 #include <crypto/aes.h>
 
 #include <net/mac80211.h>
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index dcf7395..3ccf1e9 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,8 +17,9 @@
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/completion.h>
+#include <linux/crypto.h>
 #include <linux/ieee802154.h>
-#include <crypto/algapi.h>
+#include <crypto/aead.h>
 
 #include "ieee802154_i.h"
 #include "llsec.h"