summaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto/i386
diff options
context:
space:
mode:
authorjkim <jkim@FreeBSD.org>2015-10-30 20:51:33 +0000
committerjkim <jkim@FreeBSD.org>2015-10-30 20:51:33 +0000
commit6b741bee156148072e0e9588e7c9f4a9d66d1ab9 (patch)
treee8d8b5ada49f5cdbf70d1e455c13f2625fdcdd45 /secure/lib/libcrypto/i386
parent979d5cd34dadfb0b78c606ecca3ec8d3a6ca245f (diff)
parent64cb0c902e312216cdc4c826fc0be9ba9e1bf4da (diff)
downloadFreeBSD-src-6b741bee156148072e0e9588e7c9f4a9d66d1ab9.zip
FreeBSD-src-6b741bee156148072e0e9588e7c9f4a9d66d1ab9.tar.gz
Merge OpenSSL 1.0.2d.
Diffstat (limited to 'secure/lib/libcrypto/i386')
-rw-r--r--secure/lib/libcrypto/i386/aes-586.s480
-rw-r--r--secure/lib/libcrypto/i386/aesni-x86.s1159
-rw-r--r--secure/lib/libcrypto/i386/bn-586.s2
-rw-r--r--secure/lib/libcrypto/i386/cast-586.s934
-rw-r--r--secure/lib/libcrypto/i386/des-586.s5
-rw-r--r--secure/lib/libcrypto/i386/ghash-x86.s226
-rw-r--r--secure/lib/libcrypto/i386/rc4-586.s4
-rw-r--r--secure/lib/libcrypto/i386/sha1-586.s1335
-rw-r--r--secure/lib/libcrypto/i386/sha256-586.s4551
-rw-r--r--secure/lib/libcrypto/i386/sha512-586.s2246
-rw-r--r--secure/lib/libcrypto/i386/vpaes-x86.s88
-rw-r--r--secure/lib/libcrypto/i386/wp-mmx.s322
-rw-r--r--secure/lib/libcrypto/i386/x86-gf2m.s2
-rw-r--r--secure/lib/libcrypto/i386/x86-mont.s2
-rw-r--r--secure/lib/libcrypto/i386/x86cpuid.s117
15 files changed, 8675 insertions, 2798 deletions
diff --git a/secure/lib/libcrypto/i386/aes-586.s b/secure/lib/libcrypto/i386/aes-586.s
index 704c53c..bb66276 100644
--- a/secure/lib/libcrypto/i386/aes-586.s
+++ b/secure/lib/libcrypto/i386/aes-586.s
@@ -101,74 +101,78 @@ _x86_AES_encrypt_compact:
xorl %ecx,%edx
movl %esi,%ecx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
leal (%ecx,%ecx,1),%edi
- subl %ebp,%esi
+ movl %ebp,%esi
+ shrl $7,%ebp
andl $4278124286,%edi
- andl $454761243,%esi
+ subl %ebp,%esi
movl %ecx,%ebp
+ andl $454761243,%esi
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ecx,%edi
xorl %esi,%ecx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ecx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%ecx
- rorl $16,%ebp
- xorl %ebp,%ecx
- rorl $8,%ebp
- xorl %ebp,%ecx
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ andl %edx,%ebp
leal (%edx,%edx,1),%edi
- subl %ebp,%esi
+ movl %ebp,%esi
+ shrl $7,%ebp
andl $4278124286,%edi
- andl $454761243,%esi
+ subl %ebp,%esi
movl %edx,%ebp
+ andl $454761243,%esi
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %edx,%edi
xorl %esi,%edx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%edx
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%edx
- rorl $16,%ebp
- xorl %ebp,%edx
- rorl $8,%ebp
- xorl %ebp,%edx
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ andl %eax,%ebp
leal (%eax,%eax,1),%edi
- subl %ebp,%esi
+ movl %ebp,%esi
+ shrl $7,%ebp
andl $4278124286,%edi
- andl $454761243,%esi
+ subl %ebp,%esi
movl %eax,%ebp
+ andl $454761243,%esi
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %eax,%edi
xorl %esi,%eax
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%eax
+ xorl %edi,%esi
+ movl $2155905152,%ebp
xorl %esi,%eax
- rorl $16,%ebp
- xorl %ebp,%eax
- rorl $8,%ebp
- xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ andl %ebx,%ebp
leal (%ebx,%ebx,1),%edi
- subl %ebp,%esi
+ movl %ebp,%esi
+ shrl $7,%ebp
andl $4278124286,%edi
- andl $454761243,%esi
+ subl %ebp,%esi
movl %ebx,%ebp
+ andl $454761243,%esi
+ rorl $16,%ebp
xorl %edi,%esi
+ movl %ebx,%edi
xorl %esi,%ebx
+ rorl $24,%edi
+ xorl %ebp,%esi
roll $24,%ebx
+ xorl %edi,%esi
xorl %esi,%ebx
- rorl $16,%ebp
- xorl %ebp,%ebx
- rorl $8,%ebp
- xorl %ebp,%ebx
movl 20(%esp),%edi
movl 28(%esp),%ebp
addl $16,%edi
@@ -290,74 +294,76 @@ _sse_AES_encrypt_compact:
pshufw $13,%mm4,%mm5
movd %mm1,%eax
movd %mm5,%ebx
+ movl %edi,20(%esp)
movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%ecx
- pshufw $13,%mm0,%mm2
movzbl %ah,%edx
+ pshufw $13,%mm0,%mm2
+ movzbl -128(%ebp,%esi,1),%ecx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
- shll $8,%edx
shrl $16,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ shll $8,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
- orl %esi,%ecx
pshufw $8,%mm4,%mm6
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
- orl %esi,%edx
shrl $16,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ orl %esi,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $8,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $24,%esi
orl %esi,%ecx
- movd %ecx,%mm0
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
movd %mm2,%eax
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
- orl %esi,%ecx
+ movd %ecx,%mm0
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
movd %mm6,%ebx
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
shll $8,%esi
- orl %esi,%ecx
- movd %ecx,%mm1
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%ecx
shrl $16,%ebx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
- shll $16,%esi
orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shrl $16,%eax
+ movd %ecx,%mm1
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %ah,%edi
+ shll $16,%ecx
+ andl $255,%eax
+ orl %esi,%ecx
punpckldq %mm1,%mm0
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
- orl %esi,%ecx
- andl $255,%eax
+ andl $255,%ebx
movzbl -128(%ebp,%eax,1),%eax
+ orl %esi,%ecx
shll $16,%eax
+ movzbl -128(%ebp,%edi,1),%esi
orl %eax,%edx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $8,%esi
- orl %esi,%ecx
- movd %ecx,%mm4
- andl $255,%ebx
movzbl -128(%ebp,%ebx,1),%ebx
+ orl %esi,%ecx
orl %ebx,%edx
+ movl 20(%esp),%edi
+ movd %ecx,%mm4
movd %edx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -1130,28 +1136,28 @@ _x86_AES_decrypt_compact:
movzbl -128(%ebp,%eax,1),%eax
shll $24,%eax
xorl %eax,%edx
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%eax
subl %edi,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%eax
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ebx
subl %edi,%esi
andl $4278124286,%ebx
andl $454761243,%esi
xorl %ecx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ebp
subl %edi,%esi
@@ -1162,39 +1168,39 @@ _x86_AES_decrypt_compact:
xorl %esi,%ebp
xorl %eax,%ecx
xorl %ebp,%eax
- roll $24,%eax
xorl %ebx,%ecx
xorl %ebp,%ebx
- roll $16,%ebx
+ roll $24,%eax
xorl %ebp,%ecx
- roll $8,%ebp
+ roll $16,%ebx
xorl %eax,%ecx
+ roll $8,%ebp
xorl %ebx,%ecx
movl 4(%esp),%eax
xorl %ebp,%ecx
movl %ecx,12(%esp)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebx
subl %edi,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ebx
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
xorl %edx,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%ebp
subl %edi,%esi
@@ -1205,39 +1211,39 @@ _x86_AES_decrypt_compact:
xorl %esi,%ebp
xorl %ebx,%edx
xorl %ebp,%ebx
- roll $24,%ebx
xorl %ecx,%edx
xorl %ebp,%ecx
- roll $16,%ecx
+ roll $24,%ebx
xorl %ebp,%edx
- roll $8,%ebp
+ roll $16,%ecx
xorl %ebx,%edx
+ roll $8,%ebp
xorl %ecx,%edx
movl 8(%esp),%ebx
xorl %ebp,%edx
movl %edx,16(%esp)
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %eax,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%eax,%eax,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
andl $4278124286,%edx
andl $454761243,%esi
xorl %eax,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -1248,37 +1254,37 @@ _x86_AES_decrypt_compact:
xorl %esi,%ebp
xorl %ecx,%eax
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%eax
xorl %ebp,%edx
- roll $16,%edx
+ roll $24,%ecx
xorl %ebp,%eax
- roll $8,%ebp
+ roll $16,%edx
xorl %ecx,%eax
+ roll $8,%ebp
xorl %edx,%eax
xorl %ebp,%eax
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%edi
+ movl $2155905152,%edi
+ andl %ebx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ebx,%ebx,1),%ecx
subl %edi,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%ecx
+ movl $2155905152,%edi
+ andl %ecx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%ecx,%ecx,1),%edx
subl %edi,%esi
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%edi
+ xorl %esi,%edx
+ movl $2155905152,%edi
+ andl %edx,%edi
+ movl %edi,%esi
shrl $7,%edi
leal (%edx,%edx,1),%ebp
subl %edi,%esi
@@ -1289,13 +1295,13 @@ _x86_AES_decrypt_compact:
xorl %esi,%ebp
xorl %ecx,%ebx
xorl %ebp,%ecx
- roll $24,%ecx
xorl %edx,%ebx
xorl %ebp,%edx
- roll $16,%edx
+ roll $24,%ecx
xorl %ebp,%ebx
- roll $8,%ebp
+ roll $16,%edx
xorl %ecx,%ebx
+ roll $8,%ebp
xorl %edx,%ebx
movl 12(%esp),%ecx
xorl %ebp,%ebx
@@ -1414,77 +1420,79 @@ _sse_AES_decrypt_compact:
.align 16
.L007loop:
pshufw $12,%mm0,%mm1
- movd %mm1,%eax
pshufw $9,%mm4,%mm5
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%ecx
+ movd %mm1,%eax
movd %mm5,%ebx
+ movl %edi,20(%esp)
+ movzbl %al,%esi
movzbl %ah,%edx
+ pshufw $6,%mm0,%mm2
+ movzbl -128(%ebp,%esi,1),%ecx
+ movzbl %bl,%edi
movzbl -128(%ebp,%edx,1),%edx
+ shrl $16,%eax
shll $8,%edx
- pshufw $6,%mm0,%mm2
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $16,%esi
+ pshufw $3,%mm4,%mm6
orl %esi,%ecx
- shrl $16,%eax
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $24,%esi
- orl %esi,%edx
shrl $16,%ebx
- pshufw $3,%mm4,%mm6
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ orl %esi,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shll $24,%esi
orl %esi,%ecx
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
shll $8,%esi
- orl %esi,%ecx
- movd %ecx,%mm0
- movzbl %al,%esi
movd %mm2,%eax
- movzbl -128(%ebp,%esi,1),%ecx
- shll $16,%ecx
- movzbl %bl,%esi
+ orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
+ shll $16,%esi
movd %mm6,%ebx
- movzbl -128(%ebp,%esi,1),%esi
+ movd %ecx,%mm0
+ movzbl -128(%ebp,%edi,1),%ecx
+ movzbl %al,%edi
orl %esi,%ecx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bl,%edi
orl %esi,%edx
- movzbl %bl,%esi
- movzbl -128(%ebp,%esi,1),%esi
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %ah,%edi
shll $16,%esi
- orl %esi,%edx
- movd %edx,%mm1
- movzbl %ah,%esi
- movzbl -128(%ebp,%esi,1),%edx
- shll $8,%edx
- movzbl %bh,%esi
shrl $16,%eax
- movzbl -128(%ebp,%esi,1),%esi
- shll $24,%esi
orl %esi,%edx
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %bh,%edi
shrl $16,%ebx
- punpckldq %mm1,%mm0
- movzbl %bh,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $8,%esi
- orl %esi,%ecx
+ movd %edx,%mm1
+ movzbl -128(%ebp,%edi,1),%edx
+ movzbl %bh,%edi
+ shll $24,%edx
andl $255,%ebx
+ orl %esi,%edx
+ punpckldq %mm1,%mm0
+ movzbl -128(%ebp,%edi,1),%esi
+ movzbl %al,%edi
+ shll $8,%esi
+ movzbl %ah,%eax
movzbl -128(%ebp,%ebx,1),%ebx
+ orl %esi,%ecx
+ movzbl -128(%ebp,%edi,1),%esi
orl %ebx,%edx
- movzbl %al,%esi
- movzbl -128(%ebp,%esi,1),%esi
shll $16,%esi
- orl %esi,%edx
- movd %edx,%mm4
- movzbl %ah,%eax
movzbl -128(%ebp,%eax,1),%eax
+ orl %esi,%edx
shll $24,%eax
orl %eax,%ecx
+ movl 20(%esp),%edi
+ movd %edx,%mm4
movd %ecx,%mm5
punpckldq %mm5,%mm4
addl $16,%edi
@@ -3046,30 +3054,30 @@ private_AES_set_decrypt_key:
.align 4
.L056permute:
addl $16,%edi
- movl %eax,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ movl $2155905152,%ebp
+ andl %eax,%ebp
leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
xorl %eax,%ebx
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
+ shrl $7,%ebp
xorl %eax,%ecx
subl %ebp,%esi
andl $4278124286,%edx
@@ -3090,30 +3098,30 @@ private_AES_set_decrypt_key:
movl %ebp,%ebx
xorl %edx,%eax
movl %eax,(%edi)
- movl %ebx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%ecx
andl $454761243,%esi
- xorl %ecx,%esi
- movl %esi,%ecx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%ecx
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
xorl %ebx,%ecx
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
+ shrl $7,%ebp
xorl %ebx,%edx
subl %ebp,%esi
andl $4278124286,%eax
@@ -3134,30 +3142,30 @@ private_AES_set_decrypt_key:
movl %ebp,%ecx
xorl %eax,%ebx
movl %ebx,4(%edi)
- movl %ecx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ movl $2155905152,%ebp
+ andl %ecx,%ebp
leal (%ecx,%ecx,1),%edx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%edx
andl $454761243,%esi
- xorl %edx,%esi
- movl %esi,%edx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%edx
+ movl $2155905152,%ebp
+ andl %edx,%ebp
leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
xorl %ecx,%edx
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
+ shrl $7,%ebp
xorl %ecx,%eax
subl %ebp,%esi
andl $4278124286,%ebx
@@ -3178,30 +3186,30 @@ private_AES_set_decrypt_key:
movl %ebp,%edx
xorl %ebx,%ecx
movl %ecx,8(%edi)
- movl %edx,%esi
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ movl $2155905152,%ebp
+ andl %edx,%ebp
leal (%edx,%edx,1),%eax
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%eax
andl $454761243,%esi
- xorl %eax,%esi
- movl %esi,%eax
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%eax
+ movl $2155905152,%ebp
+ andl %eax,%ebp
leal (%eax,%eax,1),%ebx
+ movl %ebp,%esi
+ shrl $7,%ebp
subl %ebp,%esi
andl $4278124286,%ebx
andl $454761243,%esi
xorl %edx,%eax
- xorl %ebx,%esi
- movl %esi,%ebx
- andl $2155905152,%esi
- movl %esi,%ebp
- shrl $7,%ebp
+ xorl %esi,%ebx
+ movl $2155905152,%ebp
+ andl %ebx,%ebp
leal (%ebx,%ebx,1),%ecx
+ movl %ebp,%esi
+ shrl $7,%ebp
xorl %edx,%ebx
subl %ebp,%esi
andl $4278124286,%ecx
@@ -3234,4 +3242,4 @@ private_AES_set_decrypt_key:
.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89
.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/aesni-x86.s b/secure/lib/libcrypto/i386/aesni-x86.s
index e05096f..5b294bd 100644
--- a/secure/lib/libcrypto/i386/aesni-x86.s
+++ b/secure/lib/libcrypto/i386/aesni-x86.s
@@ -22,7 +22,10 @@ aesni_encrypt:
leal 16(%edx),%edx
jnz .L000enc1_loop_1
.byte 102,15,56,221,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_encrypt,.-.L_aesni_encrypt_begin
.globl aesni_decrypt
@@ -46,32 +49,90 @@ aesni_decrypt:
leal 16(%edx),%edx
jnz .L001dec1_loop_2
.byte 102,15,56,223,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
ret
.size aesni_decrypt,.-.L_aesni_decrypt_begin
+.type _aesni_encrypt2,@function
+.align 16
+_aesni_encrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L002enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L002enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ ret
+.size _aesni_encrypt2,.-_aesni_encrypt2
+.type _aesni_decrypt2,@function
+.align 16
+_aesni_decrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L003dec2_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L003dec2_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+ ret
+.size _aesni_decrypt2,.-_aesni_decrypt2
.type _aesni_encrypt3,@function
.align 16
_aesni_encrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L002enc3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L004enc3_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
- movups (%edx),%xmm0
- jnz .L002enc3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L004enc3_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -84,25 +145,26 @@ _aesni_encrypt3:
.align 16
_aesni_decrypt3:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
- movups (%edx),%xmm0
-.L003dec3_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L005dec3_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
- movups (%edx),%xmm0
- jnz .L003dec3_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L005dec3_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -116,27 +178,29 @@ _aesni_decrypt3:
_aesni_encrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L004enc4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L006enc4_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
.byte 102,15,56,220,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
- movups (%edx),%xmm0
- jnz .L004enc4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L006enc4_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -152,27 +216,29 @@ _aesni_encrypt4:
_aesni_decrypt4:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
- shrl $1,%ecx
- leal 32(%edx),%edx
+ shll $4,%ecx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
pxor %xmm0,%xmm4
pxor %xmm0,%xmm5
- movups (%edx),%xmm0
-.L005dec4_loop:
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L007dec4_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
.byte 102,15,56,222,233
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
- movups (%edx),%xmm0
- jnz .L005dec4_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L007dec4_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -187,45 +253,42 @@ _aesni_decrypt4:
.align 16
_aesni_encrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
-.byte 102,15,56,220,209
pxor %xmm0,%xmm4
-.byte 102,15,56,220,217
+.byte 102,15,56,220,209
pxor %xmm0,%xmm5
- decl %ecx
-.byte 102,15,56,220,225
pxor %xmm0,%xmm6
-.byte 102,15,56,220,233
+.byte 102,15,56,220,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,220,225
pxor %xmm0,%xmm7
-.byte 102,15,56,220,241
- movups (%edx),%xmm0
-.byte 102,15,56,220,249
- jmp .L_aesni_encrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L008_aesni_encrypt6_inner
.align 16
-.L006enc6_loop:
+.L009enc6_loop:
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- decl %ecx
.byte 102,15,56,220,225
+.L008_aesni_encrypt6_inner:
.byte 102,15,56,220,233
.byte 102,15,56,220,241
.byte 102,15,56,220,249
-.align 16
.L_aesni_encrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
.byte 102,15,56,220,216
- leal 32(%edx),%edx
.byte 102,15,56,220,224
.byte 102,15,56,220,232
.byte 102,15,56,220,240
.byte 102,15,56,220,248
- movups (%edx),%xmm0
- jnz .L006enc6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L009enc6_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
.byte 102,15,56,220,225
@@ -244,45 +307,42 @@ _aesni_encrypt6:
.align 16
_aesni_decrypt6:
movups (%edx),%xmm0
- shrl $1,%ecx
+ shll $4,%ecx
movups 16(%edx),%xmm1
- leal 32(%edx),%edx
xorps %xmm0,%xmm2
pxor %xmm0,%xmm3
-.byte 102,15,56,222,209
pxor %xmm0,%xmm4
-.byte 102,15,56,222,217
+.byte 102,15,56,222,209
pxor %xmm0,%xmm5
- decl %ecx
-.byte 102,15,56,222,225
pxor %xmm0,%xmm6
-.byte 102,15,56,222,233
+.byte 102,15,56,222,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,222,225
pxor %xmm0,%xmm7
-.byte 102,15,56,222,241
- movups (%edx),%xmm0
-.byte 102,15,56,222,249
- jmp .L_aesni_decrypt6_enter
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L010_aesni_decrypt6_inner
.align 16
-.L007dec6_loop:
+.L011dec6_loop:
.byte 102,15,56,222,209
.byte 102,15,56,222,217
- decl %ecx
.byte 102,15,56,222,225
+.L010_aesni_decrypt6_inner:
.byte 102,15,56,222,233
.byte 102,15,56,222,241
.byte 102,15,56,222,249
-.align 16
.L_aesni_decrypt6_enter:
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,222,208
.byte 102,15,56,222,216
- leal 32(%edx),%edx
.byte 102,15,56,222,224
.byte 102,15,56,222,232
.byte 102,15,56,222,240
.byte 102,15,56,222,248
- movups (%edx),%xmm0
- jnz .L007dec6_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L011dec6_loop
.byte 102,15,56,222,209
.byte 102,15,56,222,217
.byte 102,15,56,222,225
@@ -312,14 +372,14 @@ aesni_ecb_encrypt:
movl 32(%esp),%edx
movl 36(%esp),%ebx
andl $-16,%eax
- jz .L008ecb_ret
+ jz .L012ecb_ret
movl 240(%edx),%ecx
testl %ebx,%ebx
- jz .L009ecb_decrypt
+ jz .L013ecb_decrypt
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L010ecb_enc_tail
+ jb .L014ecb_enc_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -328,9 +388,9 @@ aesni_ecb_encrypt:
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L011ecb_enc_loop6_enter
+ jmp .L015ecb_enc_loop6_enter
.align 16
-.L012ecb_enc_loop6:
+.L016ecb_enc_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -345,12 +405,12 @@ aesni_ecb_encrypt:
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L011ecb_enc_loop6_enter:
+.L015ecb_enc_loop6_enter:
call _aesni_encrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L012ecb_enc_loop6
+ jnc .L016ecb_enc_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -359,18 +419,18 @@ aesni_ecb_encrypt:
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L010ecb_enc_tail:
+ jz .L012ecb_ret
+.L014ecb_enc_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L013ecb_enc_one
+ jb .L017ecb_enc_one
movups 16(%esi),%xmm3
- je .L014ecb_enc_two
+ je .L018ecb_enc_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L015ecb_enc_three
+ jb .L019ecb_enc_three
movups 48(%esi),%xmm5
- je .L016ecb_enc_four
+ je .L020ecb_enc_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_encrypt6
@@ -379,50 +439,49 @@ aesni_ecb_encrypt:
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L013ecb_enc_one:
+.L017ecb_enc_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L017enc1_loop_3:
+.L021enc1_loop_3:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L017enc1_loop_3
+ jnz .L021enc1_loop_3
.byte 102,15,56,221,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L014ecb_enc_two:
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+.L018ecb_enc_two:
+ call _aesni_encrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L015ecb_enc_three:
+.L019ecb_enc_three:
call _aesni_encrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L016ecb_enc_four:
+.L020ecb_enc_four:
call _aesni_encrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L009ecb_decrypt:
+.L013ecb_decrypt:
movl %edx,%ebp
movl %ecx,%ebx
cmpl $96,%eax
- jb .L018ecb_dec_tail
+ jb .L022ecb_dec_tail
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -431,9 +490,9 @@ aesni_ecb_encrypt:
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
subl $96,%eax
- jmp .L019ecb_dec_loop6_enter
+ jmp .L023ecb_dec_loop6_enter
.align 16
-.L020ecb_dec_loop6:
+.L024ecb_dec_loop6:
movups %xmm2,(%edi)
movdqu (%esi),%xmm2
movups %xmm3,16(%edi)
@@ -448,12 +507,12 @@ aesni_ecb_encrypt:
leal 96(%edi),%edi
movdqu 80(%esi),%xmm7
leal 96(%esi),%esi
-.L019ecb_dec_loop6_enter:
+.L023ecb_dec_loop6_enter:
call _aesni_decrypt6
movl %ebp,%edx
movl %ebx,%ecx
subl $96,%eax
- jnc .L020ecb_dec_loop6
+ jnc .L024ecb_dec_loop6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
@@ -462,18 +521,18 @@ aesni_ecb_encrypt:
movups %xmm7,80(%edi)
leal 96(%edi),%edi
addl $96,%eax
- jz .L008ecb_ret
-.L018ecb_dec_tail:
+ jz .L012ecb_ret
+.L022ecb_dec_tail:
movups (%esi),%xmm2
cmpl $32,%eax
- jb .L021ecb_dec_one
+ jb .L025ecb_dec_one
movups 16(%esi),%xmm3
- je .L022ecb_dec_two
+ je .L026ecb_dec_two
movups 32(%esi),%xmm4
cmpl $64,%eax
- jb .L023ecb_dec_three
+ jb .L027ecb_dec_three
movups 48(%esi),%xmm5
- je .L024ecb_dec_four
+ je .L028ecb_dec_four
movups 64(%esi),%xmm6
xorps %xmm7,%xmm7
call _aesni_decrypt6
@@ -482,44 +541,51 @@ aesni_ecb_encrypt:
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L021ecb_dec_one:
+.L025ecb_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L025dec1_loop_4:
+.L029dec1_loop_4:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L025dec1_loop_4
+ jnz .L029dec1_loop_4
.byte 102,15,56,223,209
movups %xmm2,(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L022ecb_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L026ecb_dec_two:
+ call _aesni_decrypt2
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L023ecb_dec_three:
+.L027ecb_dec_three:
call _aesni_decrypt3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L008ecb_ret
+ jmp .L012ecb_ret
.align 16
-.L024ecb_dec_four:
+.L028ecb_dec_four:
call _aesni_decrypt4
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L008ecb_ret:
+.L012ecb_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -558,48 +624,56 @@ aesni_ccm64_encrypt_blocks:
movl %ebp,20(%esp)
movl %ebp,24(%esp)
movl %ebp,28(%esp)
- shrl $1,%ecx
+ shll $4,%ecx
+ movl $16,%ebx
leal (%edx),%ebp
movdqa (%esp),%xmm5
movdqa %xmm7,%xmm2
- movl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ subl %ecx,%ebx
.byte 102,15,56,0,253
-.L026ccm64_enc_outer:
+.L030ccm64_enc_outer:
movups (%ebp),%xmm0
movl %ebx,%ecx
movups (%esi),%xmm6
xorps %xmm0,%xmm2
movups 16(%ebp),%xmm1
xorps %xmm6,%xmm0
- leal 32(%ebp),%edx
xorps %xmm0,%xmm3
- movups (%edx),%xmm0
-.L027ccm64_enc2_loop:
+ movups 32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L027ccm64_enc2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L031ccm64_enc2_loop
.byte 102,15,56,220,209
.byte 102,15,56,220,217
paddq 16(%esp),%xmm7
+ decl %eax
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- decl %eax
leal 16(%esi),%esi
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
movups %xmm6,(%edi)
- leal 16(%edi),%edi
.byte 102,15,56,0,213
- jnz .L026ccm64_enc_outer
+ leal 16(%edi),%edi
+ jnz .L030ccm64_enc_outer
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -647,71 +721,82 @@ aesni_ccm64_decrypt_blocks:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L028enc1_loop_5:
+.L032enc1_loop_5:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L028enc1_loop_5
+ jnz .L032enc1_loop_5
.byte 102,15,56,221,209
+ shll $4,%ebx
+ movl $16,%ecx
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
leal 16(%esi),%esi
- jmp .L029ccm64_dec_outer
+ subl %ebx,%ecx
+ leal 32(%ebp,%ebx,1),%edx
+ movl %ecx,%ebx
+ jmp .L033ccm64_dec_outer
.align 16
-.L029ccm64_dec_outer:
+.L033ccm64_dec_outer:
xorps %xmm2,%xmm6
movdqa %xmm7,%xmm2
- movl %ebx,%ecx
movups %xmm6,(%edi)
leal 16(%edi),%edi
.byte 102,15,56,0,213
subl $1,%eax
- jz .L030ccm64_dec_break
+ jz .L034ccm64_dec_break
movups (%ebp),%xmm0
- shrl $1,%ecx
+ movl %ebx,%ecx
movups 16(%ebp),%xmm1
xorps %xmm0,%xmm6
- leal 32(%ebp),%edx
xorps %xmm0,%xmm2
xorps %xmm6,%xmm3
- movups (%edx),%xmm0
-.L031ccm64_dec2_loop:
+ movups 32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
.byte 102,15,56,220,209
- decl %ecx
.byte 102,15,56,220,217
- movups 16(%edx),%xmm1
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
.byte 102,15,56,220,208
- leal 32(%edx),%edx
.byte 102,15,56,220,216
- movups (%edx),%xmm0
- jnz .L031ccm64_dec2_loop
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L035ccm64_dec2_loop
movups (%esi),%xmm6
paddq 16(%esp),%xmm7
.byte 102,15,56,220,209
.byte 102,15,56,220,217
- leal 16(%esi),%esi
.byte 102,15,56,221,208
.byte 102,15,56,221,216
- jmp .L029ccm64_dec_outer
+ leal 16(%esi),%esi
+ jmp .L033ccm64_dec_outer
.align 16
-.L030ccm64_dec_break:
+.L034ccm64_dec_break:
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movups (%edx),%xmm0
movups 16(%edx),%xmm1
xorps %xmm0,%xmm6
leal 32(%edx),%edx
xorps %xmm6,%xmm3
-.L032enc1_loop_6:
+.L036enc1_loop_6:
.byte 102,15,56,220,217
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L032enc1_loop_6
+ jnz .L036enc1_loop_6
.byte 102,15,56,221,217
movl 48(%esp),%esp
movl 40(%esp),%edi
movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
popl %edi
popl %esi
popl %ebx
@@ -737,7 +822,7 @@ aesni_ctr32_encrypt_blocks:
andl $-16,%esp
movl %ebp,80(%esp)
cmpl $1,%eax
- je .L033ctr32_one_shortcut
+ je .L037ctr32_one_shortcut
movdqu (%ebx),%xmm7
movl $202182159,(%esp)
movl $134810123,4(%esp)
@@ -753,63 +838,59 @@ aesni_ctr32_encrypt_blocks:
.byte 102,15,58,34,253,3
movl 240(%edx),%ecx
bswap %ebx
- pxor %xmm1,%xmm1
pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
movdqa (%esp),%xmm2
-.byte 102,15,58,34,203,0
+.byte 102,15,58,34,195,0
leal 3(%ebx),%ebp
-.byte 102,15,58,34,197,0
+.byte 102,15,58,34,205,0
incl %ebx
-.byte 102,15,58,34,203,1
+.byte 102,15,58,34,195,1
incl %ebp
-.byte 102,15,58,34,197,1
+.byte 102,15,58,34,205,1
incl %ebx
-.byte 102,15,58,34,203,2
+.byte 102,15,58,34,195,2
incl %ebp
-.byte 102,15,58,34,197,2
- movdqa %xmm1,48(%esp)
-.byte 102,15,56,0,202
- movdqa %xmm0,64(%esp)
+.byte 102,15,58,34,205,2
+ movdqa %xmm0,48(%esp)
.byte 102,15,56,0,194
- pshufd $192,%xmm1,%xmm2
- pshufd $128,%xmm1,%xmm3
+ movdqu (%edx),%xmm6
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
+ pshufd $192,%xmm0,%xmm2
+ pshufd $128,%xmm0,%xmm3
cmpl $6,%eax
- jb .L034ctr32_tail
+ jb .L038ctr32_tail
+ pxor %xmm6,%xmm7
+ shll $4,%ecx
+ movl $16,%ebx
movdqa %xmm7,32(%esp)
- shrl $1,%ecx
movl %edx,%ebp
- movl %ecx,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
subl $6,%eax
- jmp .L035ctr32_loop6
-.align 16
-.L035ctr32_loop6:
- pshufd $64,%xmm1,%xmm4
- movdqa 32(%esp),%xmm1
- pshufd $192,%xmm0,%xmm5
- por %xmm1,%xmm2
- pshufd $128,%xmm0,%xmm6
- por %xmm1,%xmm3
- pshufd $64,%xmm0,%xmm7
- por %xmm1,%xmm4
- por %xmm1,%xmm5
- por %xmm1,%xmm6
- por %xmm1,%xmm7
- movups (%ebp),%xmm0
- movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
- decl %ecx
+ jmp .L039ctr32_loop6
+.align 16
+.L039ctr32_loop6:
+ pshufd $64,%xmm0,%xmm4
+ movdqa 32(%esp),%xmm0
+ pshufd $192,%xmm1,%xmm5
pxor %xmm0,%xmm2
+ pshufd $128,%xmm1,%xmm6
pxor %xmm0,%xmm3
-.byte 102,15,56,220,209
+ pshufd $64,%xmm1,%xmm7
+ movups 16(%ebp),%xmm1
pxor %xmm0,%xmm4
-.byte 102,15,56,220,217
pxor %xmm0,%xmm5
-.byte 102,15,56,220,225
+.byte 102,15,56,220,209
pxor %xmm0,%xmm6
-.byte 102,15,56,220,233
pxor %xmm0,%xmm7
+.byte 102,15,56,220,217
+ movups 32(%ebp),%xmm0
+ movl %ebx,%ecx
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movups (%esi),%xmm1
@@ -820,51 +901,51 @@ aesni_ctr32_encrypt_blocks:
movups %xmm2,(%edi)
movdqa 16(%esp),%xmm0
xorps %xmm1,%xmm4
- movdqa 48(%esp),%xmm1
+ movdqa 64(%esp),%xmm1
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
paddd %xmm0,%xmm1
- paddd 64(%esp),%xmm0
+ paddd 48(%esp),%xmm0
movdqa (%esp),%xmm2
movups 48(%esi),%xmm3
movups 64(%esi),%xmm4
xorps %xmm3,%xmm5
movups 80(%esi),%xmm3
leal 96(%esi),%esi
- movdqa %xmm1,48(%esp)
-.byte 102,15,56,0,202
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
xorps %xmm4,%xmm6
movups %xmm5,48(%edi)
xorps %xmm3,%xmm7
- movdqa %xmm0,64(%esp)
-.byte 102,15,56,0,194
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
movups %xmm6,64(%edi)
- pshufd $192,%xmm1,%xmm2
+ pshufd $192,%xmm0,%xmm2
movups %xmm7,80(%edi)
leal 96(%edi),%edi
- movl %ebx,%ecx
- pshufd $128,%xmm1,%xmm3
+ pshufd $128,%xmm0,%xmm3
subl $6,%eax
- jnc .L035ctr32_loop6
+ jnc .L039ctr32_loop6
addl $6,%eax
- jz .L036ctr32_ret
+ jz .L040ctr32_ret
+ movdqu (%ebp),%xmm7
movl %ebp,%edx
- leal 1(,%ecx,2),%ecx
- movdqa 32(%esp),%xmm7
-.L034ctr32_tail:
+ pxor 32(%esp),%xmm7
+ movl 240(%ebp),%ecx
+.L038ctr32_tail:
por %xmm7,%xmm2
cmpl $2,%eax
- jb .L037ctr32_one
- pshufd $64,%xmm1,%xmm4
+ jb .L041ctr32_one
+ pshufd $64,%xmm0,%xmm4
por %xmm7,%xmm3
- je .L038ctr32_two
- pshufd $192,%xmm0,%xmm5
+ je .L042ctr32_two
+ pshufd $192,%xmm1,%xmm5
por %xmm7,%xmm4
cmpl $4,%eax
- jb .L039ctr32_three
- pshufd $128,%xmm0,%xmm6
+ jb .L043ctr32_three
+ pshufd $128,%xmm1,%xmm6
por %xmm7,%xmm5
- je .L040ctr32_four
+ je .L044ctr32_four
por %xmm7,%xmm6
call _aesni_encrypt6
movups (%esi),%xmm1
@@ -882,39 +963,39 @@ aesni_ctr32_encrypt_blocks:
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L033ctr32_one_shortcut:
+.L037ctr32_one_shortcut:
movups (%ebx),%xmm2
movl 240(%edx),%ecx
-.L037ctr32_one:
+.L041ctr32_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L041enc1_loop_7:
+.L045enc1_loop_7:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L041enc1_loop_7
+ jnz .L045enc1_loop_7
.byte 102,15,56,221,209
movups (%esi),%xmm6
xorps %xmm2,%xmm6
movups %xmm6,(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L038ctr32_two:
- call _aesni_encrypt3
+.L042ctr32_two:
+ call _aesni_encrypt2
movups (%esi),%xmm5
movups 16(%esi),%xmm6
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L039ctr32_three:
+.L043ctr32_three:
call _aesni_encrypt3
movups (%esi),%xmm5
movups 16(%esi),%xmm6
@@ -925,9 +1006,9 @@ aesni_ctr32_encrypt_blocks:
xorps %xmm7,%xmm4
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
- jmp .L036ctr32_ret
+ jmp .L040ctr32_ret
.align 16
-.L040ctr32_four:
+.L044ctr32_four:
call _aesni_encrypt4
movups (%esi),%xmm6
movups 16(%esi),%xmm7
@@ -941,7 +1022,18 @@ aesni_ctr32_encrypt_blocks:
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
-.L036ctr32_ret:
+.L040ctr32_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
movl 80(%esp),%esp
popl %edi
popl %esi
@@ -966,12 +1058,12 @@ aesni_xts_encrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L042enc1_loop_8:
+.L046enc1_loop_8:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L042enc1_loop_8
+ jnz .L046enc1_loop_8
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -995,12 +1087,14 @@ aesni_xts_encrypt:
movl %edx,%ebp
movl %ecx,%ebx
subl $96,%eax
- jc .L043xts_enc_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L044xts_enc_loop6
+ jc .L047xts_enc_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L048xts_enc_loop6
.align 16
-.L044xts_enc_loop6:
+.L048xts_enc_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -1036,6 +1130,7 @@ aesni_xts_encrypt:
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -1051,19 +1146,17 @@ aesni_xts_encrypt:
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
-.byte 102,15,56,220,209
pxor 32(%esp),%xmm4
-.byte 102,15,56,220,217
+.byte 102,15,56,220,209
pxor 48(%esp),%xmm5
- decl %ecx
-.byte 102,15,56,220,225
pxor 64(%esp),%xmm6
-.byte 102,15,56,220,233
+.byte 102,15,56,220,217
pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
.byte 102,15,56,220,241
- movups (%edx),%xmm0
.byte 102,15,56,220,249
call .L_aesni_encrypt6_enter
movdqa 80(%esp),%xmm1
@@ -1088,26 +1181,25 @@ aesni_xts_encrypt:
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L044xts_enc_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L048xts_enc_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L043xts_enc_short:
+.L047xts_enc_short:
addl $96,%eax
- jz .L045xts_enc_done6x
+ jz .L049xts_enc_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L046xts_enc_one
+ jb .L050xts_enc_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L047xts_enc_two
+ je .L051xts_enc_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -1116,7 +1208,7 @@ aesni_xts_encrypt:
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L048xts_enc_three
+ jb .L052xts_enc_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -1126,7 +1218,7 @@ aesni_xts_encrypt:
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L049xts_enc_four
+ je .L053xts_enc_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -1158,9 +1250,9 @@ aesni_xts_encrypt:
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L046xts_enc_one:
+.L050xts_enc_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -1168,37 +1260,36 @@ aesni_xts_encrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L051enc1_loop_9:
+.L055enc1_loop_9:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L051enc1_loop_9
+ jnz .L055enc1_loop_9
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L047xts_enc_two:
+.L051xts_enc_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- xorps %xmm4,%xmm4
- call _aesni_encrypt3
+ call _aesni_encrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L048xts_enc_three:
+.L052xts_enc_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1216,9 +1307,9 @@ aesni_xts_encrypt:
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L049xts_enc_four:
+.L053xts_enc_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1240,28 +1331,28 @@ aesni_xts_encrypt:
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L050xts_enc_done
+ jmp .L054xts_enc_done
.align 16
-.L045xts_enc_done6x:
+.L049xts_enc_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
movdqa %xmm1,%xmm5
movl %eax,112(%esp)
- jmp .L053xts_enc_steal
+ jmp .L057xts_enc_steal
.align 16
-.L050xts_enc_done:
+.L054xts_enc_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L052xts_enc_ret
+ jz .L056xts_enc_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm5
paddq %xmm1,%xmm1
pand 96(%esp),%xmm5
pxor %xmm1,%xmm5
-.L053xts_enc_steal:
+.L057xts_enc_steal:
movzbl (%esi),%ecx
movzbl -16(%edi),%edx
leal 1(%esi),%esi
@@ -1269,7 +1360,7 @@ aesni_xts_encrypt:
movb %dl,(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L053xts_enc_steal
+ jnz .L057xts_enc_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -1279,16 +1370,30 @@ aesni_xts_encrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L054enc1_loop_10:
+.L058enc1_loop_10:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L054enc1_loop_10
+ jnz .L058enc1_loop_10
.byte 102,15,56,221,209
xorps %xmm5,%xmm2
movups %xmm2,-16(%edi)
-.L052xts_enc_ret:
+.L056xts_enc_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -1313,12 +1418,12 @@ aesni_xts_decrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L055enc1_loop_11:
+.L059enc1_loop_11:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L055enc1_loop_11
+ jnz .L059enc1_loop_11
.byte 102,15,56,221,209
movl 20(%esp),%esi
movl 24(%esp),%edi
@@ -1347,12 +1452,14 @@ aesni_xts_decrypt:
pcmpgtd %xmm1,%xmm0
andl $-16,%eax
subl $96,%eax
- jc .L056xts_dec_short
- shrl $1,%ecx
- movl %ecx,%ebx
- jmp .L057xts_dec_loop6
+ jc .L060xts_dec_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L061xts_dec_loop6
.align 16
-.L057xts_dec_loop6:
+.L061xts_dec_loop6:
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,(%esp)
@@ -1388,6 +1495,7 @@ aesni_xts_decrypt:
pand %xmm3,%xmm7
movups (%esi),%xmm2
pxor %xmm1,%xmm7
+ movl %ebx,%ecx
movdqu 16(%esi),%xmm3
xorps %xmm0,%xmm2
movdqu 32(%esi),%xmm4
@@ -1403,19 +1511,17 @@ aesni_xts_decrypt:
movdqa %xmm7,80(%esp)
pxor %xmm1,%xmm7
movups 16(%ebp),%xmm1
- leal 32(%ebp),%edx
pxor 16(%esp),%xmm3
-.byte 102,15,56,222,209
pxor 32(%esp),%xmm4
-.byte 102,15,56,222,217
+.byte 102,15,56,222,209
pxor 48(%esp),%xmm5
- decl %ecx
-.byte 102,15,56,222,225
pxor 64(%esp),%xmm6
-.byte 102,15,56,222,233
+.byte 102,15,56,222,217
pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
.byte 102,15,56,222,241
- movups (%edx),%xmm0
.byte 102,15,56,222,249
call .L_aesni_decrypt6_enter
movdqa 80(%esp),%xmm1
@@ -1440,26 +1546,25 @@ aesni_xts_decrypt:
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
- movl %ebx,%ecx
pxor %xmm2,%xmm1
subl $96,%eax
- jnc .L057xts_dec_loop6
- leal 1(,%ecx,2),%ecx
+ jnc .L061xts_dec_loop6
+ movl 240(%ebp),%ecx
movl %ebp,%edx
movl %ecx,%ebx
-.L056xts_dec_short:
+.L060xts_dec_short:
addl $96,%eax
- jz .L058xts_dec_done6x
+ jz .L062xts_dec_done6x
movdqa %xmm1,%xmm5
cmpl $32,%eax
- jb .L059xts_dec_one
+ jb .L063xts_dec_one
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
paddq %xmm1,%xmm1
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
- je .L060xts_dec_two
+ je .L064xts_dec_two
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm6
@@ -1468,7 +1573,7 @@ aesni_xts_decrypt:
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
cmpl $64,%eax
- jb .L061xts_dec_three
+ jb .L065xts_dec_three
pshufd $19,%xmm0,%xmm2
pxor %xmm0,%xmm0
movdqa %xmm1,%xmm7
@@ -1478,7 +1583,7 @@ aesni_xts_decrypt:
pxor %xmm2,%xmm1
movdqa %xmm5,(%esp)
movdqa %xmm6,16(%esp)
- je .L062xts_dec_four
+ je .L066xts_dec_four
movdqa %xmm7,32(%esp)
pshufd $19,%xmm0,%xmm7
movdqa %xmm1,48(%esp)
@@ -1510,9 +1615,9 @@ aesni_xts_decrypt:
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
leal 80(%edi),%edi
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L059xts_dec_one:
+.L063xts_dec_one:
movups (%esi),%xmm2
leal 16(%esi),%esi
xorps %xmm5,%xmm2
@@ -1520,36 +1625,36 @@ aesni_xts_decrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L064dec1_loop_12:
+.L068dec1_loop_12:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L064dec1_loop_12
+ jnz .L068dec1_loop_12
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
leal 16(%edi),%edi
movdqa %xmm5,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L060xts_dec_two:
+.L064xts_dec_two:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
leal 32(%esi),%esi
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
- call _aesni_decrypt3
+ call _aesni_decrypt2
xorps %xmm5,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
leal 32(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L061xts_dec_three:
+.L065xts_dec_three:
movaps %xmm1,%xmm7
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1567,9 +1672,9 @@ aesni_xts_decrypt:
movups %xmm4,32(%edi)
leal 48(%edi),%edi
movdqa %xmm7,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L062xts_dec_four:
+.L066xts_dec_four:
movaps %xmm1,%xmm6
movups (%esi),%xmm2
movups 16(%esi),%xmm3
@@ -1591,20 +1696,20 @@ aesni_xts_decrypt:
movups %xmm5,48(%edi)
leal 64(%edi),%edi
movdqa %xmm6,%xmm1
- jmp .L063xts_dec_done
+ jmp .L067xts_dec_done
.align 16
-.L058xts_dec_done6x:
+.L062xts_dec_done6x:
movl 112(%esp),%eax
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
movl %eax,112(%esp)
- jmp .L066xts_dec_only_one_more
+ jmp .L070xts_dec_only_one_more
.align 16
-.L063xts_dec_done:
+.L067xts_dec_done:
movl 112(%esp),%eax
pxor %xmm0,%xmm0
andl $15,%eax
- jz .L065xts_dec_ret
+ jz .L069xts_dec_ret
pcmpgtd %xmm1,%xmm0
movl %eax,112(%esp)
pshufd $19,%xmm0,%xmm2
@@ -1614,7 +1719,7 @@ aesni_xts_decrypt:
pand %xmm3,%xmm2
pcmpgtd %xmm1,%xmm0
pxor %xmm2,%xmm1
-.L066xts_dec_only_one_more:
+.L070xts_dec_only_one_more:
pshufd $19,%xmm0,%xmm5
movdqa %xmm1,%xmm6
paddq %xmm1,%xmm1
@@ -1628,16 +1733,16 @@ aesni_xts_decrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L067dec1_loop_13:
+.L071dec1_loop_13:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L067dec1_loop_13
+ jnz .L071dec1_loop_13
.byte 102,15,56,223,209
xorps %xmm5,%xmm2
movups %xmm2,(%edi)
-.L068xts_dec_steal:
+.L072xts_dec_steal:
movzbl 16(%esi),%ecx
movzbl (%edi),%edx
leal 1(%esi),%esi
@@ -1645,7 +1750,7 @@ aesni_xts_decrypt:
movb %dl,16(%edi)
leal 1(%edi),%edi
subl $1,%eax
- jnz .L068xts_dec_steal
+ jnz .L072xts_dec_steal
subl 112(%esp),%edi
movl %ebp,%edx
movl %ebx,%ecx
@@ -1655,16 +1760,30 @@ aesni_xts_decrypt:
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L069dec1_loop_14:
+.L073dec1_loop_14:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L069dec1_loop_14
+ jnz .L073dec1_loop_14
.byte 102,15,56,223,209
xorps %xmm6,%xmm2
movups %xmm2,(%edi)
-.L065xts_dec_ret:
+.L069xts_dec_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
movl 116(%esp),%esp
popl %edi
popl %esi
@@ -1690,7 +1809,7 @@ aesni_cbc_encrypt:
movl 32(%esp),%edx
movl 36(%esp),%ebp
testl %eax,%eax
- jz .L070cbc_abort
+ jz .L074cbc_abort
cmpl $0,40(%esp)
xchgl %esp,%ebx
movups (%ebp),%xmm7
@@ -1698,14 +1817,14 @@ aesni_cbc_encrypt:
movl %edx,%ebp
movl %ebx,16(%esp)
movl %ecx,%ebx
- je .L071cbc_decrypt
+ je .L075cbc_decrypt
movaps %xmm7,%xmm2
cmpl $16,%eax
- jb .L072cbc_enc_tail
+ jb .L076cbc_enc_tail
subl $16,%eax
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L073cbc_enc_loop:
+.L077cbc_enc_loop:
movups (%esi),%xmm7
leal 16(%esi),%esi
movups (%edx),%xmm0
@@ -1713,24 +1832,25 @@ aesni_cbc_encrypt:
xorps %xmm0,%xmm7
leal 32(%edx),%edx
xorps %xmm7,%xmm2
-.L074enc1_loop_15:
+.L078enc1_loop_15:
.byte 102,15,56,220,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L074enc1_loop_15
+ jnz .L078enc1_loop_15
.byte 102,15,56,221,209
movl %ebx,%ecx
movl %ebp,%edx
movups %xmm2,(%edi)
leal 16(%edi),%edi
subl $16,%eax
- jnc .L073cbc_enc_loop
+ jnc .L077cbc_enc_loop
addl $16,%eax
- jnz .L072cbc_enc_tail
+ jnz .L076cbc_enc_tail
movaps %xmm2,%xmm7
- jmp .L075cbc_ret
-.L072cbc_enc_tail:
+ pxor %xmm2,%xmm2
+ jmp .L079cbc_ret
+.L076cbc_enc_tail:
movl %eax,%ecx
.long 2767451785
movl $16,%ecx
@@ -1741,20 +1861,20 @@ aesni_cbc_encrypt:
movl %ebx,%ecx
movl %edi,%esi
movl %ebp,%edx
- jmp .L073cbc_enc_loop
+ jmp .L077cbc_enc_loop
.align 16
-.L071cbc_decrypt:
+.L075cbc_decrypt:
cmpl $80,%eax
- jbe .L076cbc_dec_tail
+ jbe .L080cbc_dec_tail
movaps %xmm7,(%esp)
subl $80,%eax
- jmp .L077cbc_dec_loop6_enter
+ jmp .L081cbc_dec_loop6_enter
.align 16
-.L078cbc_dec_loop6:
+.L082cbc_dec_loop6:
movaps %xmm0,(%esp)
movups %xmm7,(%edi)
leal 16(%edi),%edi
-.L077cbc_dec_loop6_enter:
+.L081cbc_dec_loop6_enter:
movdqu (%esi),%xmm2
movdqu 16(%esi),%xmm3
movdqu 32(%esi),%xmm4
@@ -1784,28 +1904,28 @@ aesni_cbc_encrypt:
movups %xmm6,64(%edi)
leal 80(%edi),%edi
subl $96,%eax
- ja .L078cbc_dec_loop6
+ ja .L082cbc_dec_loop6
movaps %xmm7,%xmm2
movaps %xmm0,%xmm7
addl $80,%eax
- jle .L079cbc_dec_tail_collected
+ jle .L083cbc_dec_clear_tail_collected
movups %xmm2,(%edi)
leal 16(%edi),%edi
-.L076cbc_dec_tail:
+.L080cbc_dec_tail:
movups (%esi),%xmm2
movaps %xmm2,%xmm6
cmpl $16,%eax
- jbe .L080cbc_dec_one
+ jbe .L084cbc_dec_one
movups 16(%esi),%xmm3
movaps %xmm3,%xmm5
cmpl $32,%eax
- jbe .L081cbc_dec_two
+ jbe .L085cbc_dec_two
movups 32(%esi),%xmm4
cmpl $48,%eax
- jbe .L082cbc_dec_three
+ jbe .L086cbc_dec_three
movups 48(%esi),%xmm5
cmpl $64,%eax
- jbe .L083cbc_dec_four
+ jbe .L087cbc_dec_four
movups 64(%esi),%xmm6
movaps %xmm7,(%esp)
movups (%esi),%xmm2
@@ -1823,56 +1943,62 @@ aesni_cbc_encrypt:
xorps %xmm0,%xmm6
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
movups %xmm5,48(%edi)
+ pxor %xmm5,%xmm5
leal 64(%edi),%edi
movaps %xmm6,%xmm2
+ pxor %xmm6,%xmm6
subl $80,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L080cbc_dec_one:
+.L084cbc_dec_one:
movups (%edx),%xmm0
movups 16(%edx),%xmm1
leal 32(%edx),%edx
xorps %xmm0,%xmm2
-.L084dec1_loop_16:
+.L089dec1_loop_16:
.byte 102,15,56,222,209
decl %ecx
movups (%edx),%xmm1
leal 16(%edx),%edx
- jnz .L084dec1_loop_16
+ jnz .L089dec1_loop_16
.byte 102,15,56,223,209
xorps %xmm7,%xmm2
movaps %xmm6,%xmm7
subl $16,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L081cbc_dec_two:
- xorps %xmm4,%xmm4
- call _aesni_decrypt3
+.L085cbc_dec_two:
+ call _aesni_decrypt2
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movaps %xmm3,%xmm2
+ pxor %xmm3,%xmm3
leal 16(%edi),%edi
movaps %xmm5,%xmm7
subl $32,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L082cbc_dec_three:
+.L086cbc_dec_three:
call _aesni_decrypt3
xorps %xmm7,%xmm2
xorps %xmm6,%xmm3
xorps %xmm5,%xmm4
movups %xmm2,(%edi)
movaps %xmm4,%xmm2
+ pxor %xmm4,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
leal 32(%edi),%edi
movups 32(%esi),%xmm7
subl $48,%eax
- jmp .L079cbc_dec_tail_collected
+ jmp .L088cbc_dec_tail_collected
.align 16
-.L083cbc_dec_four:
+.L087cbc_dec_four:
call _aesni_decrypt4
movups 16(%esi),%xmm1
movups 32(%esi),%xmm0
@@ -1882,28 +2008,44 @@ aesni_cbc_encrypt:
movups %xmm2,(%edi)
xorps %xmm1,%xmm4
movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
leal 48(%edi),%edi
movaps %xmm5,%xmm2
+ pxor %xmm5,%xmm5
subl $64,%eax
-.L079cbc_dec_tail_collected:
+ jmp .L088cbc_dec_tail_collected
+.align 16
+.L083cbc_dec_clear_tail_collected:
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+.L088cbc_dec_tail_collected:
andl $15,%eax
- jnz .L085cbc_dec_tail_partial
+ jnz .L090cbc_dec_tail_partial
movups %xmm2,(%edi)
- jmp .L075cbc_ret
+ pxor %xmm0,%xmm0
+ jmp .L079cbc_ret
.align 16
-.L085cbc_dec_tail_partial:
+.L090cbc_dec_tail_partial:
movaps %xmm2,(%esp)
+ pxor %xmm0,%xmm0
movl $16,%ecx
movl %esp,%esi
subl %eax,%ecx
.long 2767451785
-.L075cbc_ret:
+ movdqa %xmm2,(%esp)
+.L079cbc_ret:
movl 16(%esp),%esp
movl 36(%esp),%ebp
+ pxor %xmm2,%xmm2
+ pxor %xmm1,%xmm1
movups %xmm7,(%ebp)
-.L070cbc_abort:
+ pxor %xmm7,%xmm7
+.L074cbc_abort:
popl %edi
popl %esi
popl %ebx
@@ -1913,52 +2055,62 @@ aesni_cbc_encrypt:
.type _aesni_set_encrypt_key,@function
.align 16
_aesni_set_encrypt_key:
+ pushl %ebp
+ pushl %ebx
testl %eax,%eax
- jz .L086bad_pointer
+ jz .L091bad_pointer
testl %edx,%edx
- jz .L086bad_pointer
+ jz .L091bad_pointer
+ call .L092pic
+.L092pic:
+ popl %ebx
+ leal .Lkey_const-.L092pic(%ebx),%ebx
+ leal OPENSSL_ia32cap_P,%ebp
movups (%eax),%xmm0
xorps %xmm4,%xmm4
+ movl 4(%ebp),%ebp
leal 16(%edx),%edx
+ andl $268437504,%ebp
cmpl $256,%ecx
- je .L08714rounds
+ je .L09314rounds
cmpl $192,%ecx
- je .L08812rounds
+ je .L09412rounds
cmpl $128,%ecx
- jne .L089bad_keybits
+ jne .L095bad_keybits
.align 16
-.L09010rounds:
+.L09610rounds:
+ cmpl $268435456,%ebp
+ je .L09710rounds_alt
movl $9,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,200,1
- call .L091key_128_cold
+ call .L098key_128_cold
.byte 102,15,58,223,200,2
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,4
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,8
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,16
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,32
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,64
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,128
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,27
- call .L092key_128
+ call .L099key_128
.byte 102,15,58,223,200,54
- call .L092key_128
+ call .L099key_128
movups %xmm0,(%edx)
movl %ecx,80(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L092key_128:
+.L099key_128:
movups %xmm0,(%edx)
leal 16(%edx),%edx
-.L091key_128_cold:
+.L098key_128_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -1967,38 +2119,91 @@ _aesni_set_encrypt_key:
xorps %xmm1,%xmm0
ret
.align 16
-.L08812rounds:
+.L09710rounds_alt:
+ movdqa (%ebx),%xmm5
+ movl $8,%ecx
+ movdqa 32(%ebx),%xmm4
+ movdqa %xmm0,%xmm2
+ movdqu %xmm0,-16(%edx)
+.L101loop_key128:
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ leal 16(%edx),%edx
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,-16(%edx)
+ movdqa %xmm0,%xmm2
+ decl %ecx
+ jnz .L101loop_key128
+ movdqa 48(%ebx),%xmm4
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ movdqa %xmm0,%xmm2
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,16(%edx)
+ movl $9,%ecx
+ movl %ecx,96(%edx)
+ jmp .L100good_key
+.align 16
+.L09412rounds:
movq 16(%eax),%xmm2
+ cmpl $268435456,%ebp
+ je .L10212rounds_alt
movl $11,%ecx
movups %xmm0,-16(%edx)
.byte 102,15,58,223,202,1
- call .L093key_192a_cold
+ call .L103key_192a_cold
.byte 102,15,58,223,202,2
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,4
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,8
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,16
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,32
- call .L094key_192b
+ call .L104key_192b
.byte 102,15,58,223,202,64
- call .L095key_192a
+ call .L105key_192a
.byte 102,15,58,223,202,128
- call .L094key_192b
+ call .L104key_192b
movups %xmm0,(%edx)
movl %ecx,48(%edx)
- xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L095key_192a:
+.L105key_192a:
movups %xmm0,(%edx)
leal 16(%edx),%edx
.align 16
-.L093key_192a_cold:
+.L103key_192a_cold:
movaps %xmm2,%xmm5
-.L096key_192b_warm:
+.L106key_192b_warm:
shufps $16,%xmm0,%xmm4
movdqa %xmm2,%xmm3
xorps %xmm4,%xmm0
@@ -2012,56 +2217,90 @@ _aesni_set_encrypt_key:
pxor %xmm3,%xmm2
ret
.align 16
-.L094key_192b:
+.L104key_192b:
movaps %xmm0,%xmm3
shufps $68,%xmm0,%xmm5
movups %xmm5,(%edx)
shufps $78,%xmm2,%xmm3
movups %xmm3,16(%edx)
leal 32(%edx),%edx
- jmp .L096key_192b_warm
+ jmp .L106key_192b_warm
+.align 16
+.L10212rounds_alt:
+ movdqa 16(%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $8,%ecx
+ movdqu %xmm0,-16(%edx)
+.L107loop_key192:
+ movq %xmm2,(%edx)
+ movdqa %xmm2,%xmm1
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ pslld $1,%xmm4
+ leal 24(%edx),%edx
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm2
+ movdqu %xmm0,-16(%edx)
+ decl %ecx
+ jnz .L107loop_key192
+ movl $11,%ecx
+ movl %ecx,32(%edx)
+ jmp .L100good_key
.align 16
-.L08714rounds:
+.L09314rounds:
movups 16(%eax),%xmm2
- movl $13,%ecx
leal 16(%edx),%edx
+ cmpl $268435456,%ebp
+ je .L10814rounds_alt
+ movl $13,%ecx
movups %xmm0,-32(%edx)
movups %xmm2,-16(%edx)
.byte 102,15,58,223,202,1
- call .L097key_256a_cold
+ call .L109key_256a_cold
.byte 102,15,58,223,200,1
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,2
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,2
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,4
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,4
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,8
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,8
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,16
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,16
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,32
- call .L099key_256a
+ call .L111key_256a
.byte 102,15,58,223,200,32
- call .L098key_256b
+ call .L110key_256b
.byte 102,15,58,223,202,64
- call .L099key_256a
+ call .L111key_256a
movups %xmm0,(%edx)
movl %ecx,16(%edx)
xorl %eax,%eax
- ret
+ jmp .L100good_key
.align 16
-.L099key_256a:
+.L111key_256a:
movups %xmm2,(%edx)
leal 16(%edx),%edx
-.L097key_256a_cold:
+.L109key_256a_cold:
shufps $16,%xmm0,%xmm4
xorps %xmm4,%xmm0
shufps $140,%xmm0,%xmm4
@@ -2070,7 +2309,7 @@ _aesni_set_encrypt_key:
xorps %xmm1,%xmm0
ret
.align 16
-.L098key_256b:
+.L110key_256b:
movups %xmm0,(%edx)
leal 16(%edx),%edx
shufps $16,%xmm2,%xmm4
@@ -2080,13 +2319,70 @@ _aesni_set_encrypt_key:
shufps $170,%xmm1,%xmm1
xorps %xmm1,%xmm2
ret
+.align 16
+.L10814rounds_alt:
+ movdqa (%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $7,%ecx
+ movdqu %xmm0,-32(%edx)
+ movdqa %xmm2,%xmm1
+ movdqu %xmm2,-16(%edx)
+.L112loop_key256:
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pslld $1,%xmm4
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ decl %ecx
+ jz .L113done_key256
+ pshufd $255,%xmm0,%xmm2
+ pxor %xmm3,%xmm3
+.byte 102,15,56,221,211
+ movdqa %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm3,%xmm1
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,16(%edx)
+ leal 32(%edx),%edx
+ movdqa %xmm2,%xmm1
+ jmp .L112loop_key256
+.L113done_key256:
+ movl $13,%ecx
+ movl %ecx,16(%edx)
+.L100good_key:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorl %eax,%eax
+ popl %ebx
+ popl %ebp
+ ret
.align 4
-.L086bad_pointer:
+.L091bad_pointer:
movl $-1,%eax
+ popl %ebx
+ popl %ebp
ret
.align 4
-.L089bad_keybits:
+.L095bad_keybits:
+ pxor %xmm0,%xmm0
movl $-2,%eax
+ popl %ebx
+ popl %ebp
ret
.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
.globl aesni_set_encrypt_key
@@ -2112,7 +2408,7 @@ aesni_set_decrypt_key:
movl 12(%esp),%edx
shll $4,%ecx
testl %eax,%eax
- jnz .L100dec_key_ret
+ jnz .L114dec_key_ret
leal 16(%edx,%ecx,1),%eax
movups (%edx),%xmm0
movups (%eax),%xmm1
@@ -2120,7 +2416,7 @@ aesni_set_decrypt_key:
movups %xmm1,(%edx)
leal 16(%edx),%edx
leal -16(%eax),%eax
-.L101dec_key_inverse:
+.L115dec_key_inverse:
movups (%edx),%xmm0
movups (%eax),%xmm1
.byte 102,15,56,219,192
@@ -2130,15 +2426,24 @@ aesni_set_decrypt_key:
movups %xmm0,16(%eax)
movups %xmm1,-16(%edx)
cmpl %edx,%eax
- ja .L101dec_key_inverse
+ ja .L115dec_key_inverse
movups (%edx),%xmm0
.byte 102,15,56,219,192
movups %xmm0,(%edx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
xorl %eax,%eax
-.L100dec_key_ret:
+.L114dec_key_ret:
ret
.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.align 64
+.Lkey_const:
+.long 202313229,202313229,202313229,202313229
+.long 67569157,67569157,67569157,67569157
+.long 1,1,1,1
+.long 27,27,27,27
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/bn-586.s b/secure/lib/libcrypto/i386/bn-586.s
index b40296e..3200c6d 100644
--- a/secure/lib/libcrypto/i386/bn-586.s
+++ b/secure/lib/libcrypto/i386/bn-586.s
@@ -1519,4 +1519,4 @@ bn_sub_part_words:
popl %ebp
ret
.size bn_sub_part_words,.-.L_bn_sub_part_words_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/cast-586.s b/secure/lib/libcrypto/i386/cast-586.s
deleted file mode 100644
index 2d65735..0000000
--- a/secure/lib/libcrypto/i386/cast-586.s
+++ /dev/null
@@ -1,934 +0,0 @@
- # $FreeBSD$
-.file "cast-586.s"
-.text
-.globl CAST_encrypt
-.type CAST_encrypt,@function
-.align 16
-CAST_encrypt:
-.L_CAST_encrypt_begin:
-
- pushl %ebp
- pushl %ebx
- movl 12(%esp),%ebx
- movl 16(%esp),%ebp
- pushl %esi
- pushl %edi
-
- movl (%ebx),%edi
- movl 4(%ebx),%esi
-
- movl 128(%ebp),%eax
- pushl %eax
- xorl %eax,%eax
-
- movl (%ebp),%edx
- movl 4(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 8(%ebp),%edx
- movl 12(%ebp),%ecx
- xorl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 16(%ebp),%edx
- movl 20(%ebp),%ecx
- subl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 24(%ebp),%edx
- movl 28(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 32(%ebp),%edx
- movl 36(%ebp),%ecx
- xorl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 40(%ebp),%edx
- movl 44(%ebp),%ecx
- subl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 48(%ebp),%edx
- movl 52(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 56(%ebp),%edx
- movl 60(%ebp),%ecx
- xorl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 64(%ebp),%edx
- movl 68(%ebp),%ecx
- subl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 72(%ebp),%edx
- movl 76(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 80(%ebp),%edx
- movl 84(%ebp),%ecx
- xorl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 88(%ebp),%edx
- movl 92(%ebp),%ecx
- subl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%esi
-
- popl %edx
- orl %edx,%edx
- jnz .L000cast_enc_done
-
- movl 96(%ebp),%edx
- movl 100(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 104(%ebp),%edx
- movl 108(%ebp),%ecx
- xorl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 112(%ebp),%edx
- movl 116(%ebp),%ecx
- subl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 120(%ebp),%edx
- movl 124(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
-.L000cast_enc_done:
- nop
- movl 20(%esp),%eax
- movl %edi,4(%eax)
- movl %esi,(%eax)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size CAST_encrypt,.-.L_CAST_encrypt_begin
-.globl CAST_decrypt
-.type CAST_decrypt,@function
-.align 16
-CAST_decrypt:
-.L_CAST_decrypt_begin:
-
- pushl %ebp
- pushl %ebx
- movl 12(%esp),%ebx
- movl 16(%esp),%ebp
- pushl %esi
- pushl %edi
-
- movl (%ebx),%edi
- movl 4(%ebx),%esi
-
- movl 128(%ebp),%eax
- orl %eax,%eax
- jnz .L001cast_dec_skip
- xorl %eax,%eax
-
- movl 120(%ebp),%edx
- movl 124(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 112(%ebp),%edx
- movl 116(%ebp),%ecx
- subl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 104(%ebp),%edx
- movl 108(%ebp),%ecx
- xorl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 96(%ebp),%edx
- movl 100(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
-.L001cast_dec_skip:
-
- movl 88(%ebp),%edx
- movl 92(%ebp),%ecx
- subl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 80(%ebp),%edx
- movl 84(%ebp),%ecx
- xorl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 72(%ebp),%edx
- movl 76(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 64(%ebp),%edx
- movl 68(%ebp),%ecx
- subl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 56(%ebp),%edx
- movl 60(%ebp),%ecx
- xorl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 48(%ebp),%edx
- movl 52(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 40(%ebp),%edx
- movl 44(%ebp),%ecx
- subl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 32(%ebp),%edx
- movl 36(%ebp),%ecx
- xorl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 24(%ebp),%edx
- movl 28(%ebp),%ecx
- addl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%edi
-
- movl 16(%ebp),%edx
- movl 20(%ebp),%ecx
- subl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- subl %ebx,%ecx
- xorl %ecx,%esi
-
- movl 8(%ebp),%edx
- movl 12(%ebp),%ecx
- xorl %esi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- addl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- xorl %ebx,%ecx
- xorl %ecx,%edi
-
- movl (%ebp),%edx
- movl 4(%ebp),%ecx
- addl %edi,%edx
- roll %cl,%edx
- movl %edx,%ebx
- xorl %ecx,%ecx
- movb %dh,%cl
- andl $255,%ebx
- shrl $16,%edx
- xorl %eax,%eax
- movb %dh,%al
- andl $255,%edx
- movl CAST_S_table0(,%ecx,4),%ecx
- movl CAST_S_table1(,%ebx,4),%ebx
- xorl %ebx,%ecx
- movl CAST_S_table2(,%eax,4),%ebx
- subl %ebx,%ecx
- movl CAST_S_table3(,%edx,4),%ebx
- addl %ebx,%ecx
- xorl %ecx,%esi
- nop
- movl 20(%esp),%eax
- movl %edi,4(%eax)
- movl %esi,(%eax)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.size CAST_decrypt,.-.L_CAST_decrypt_begin
-.globl CAST_cbc_encrypt
-.type CAST_cbc_encrypt,@function
-.align 16
-CAST_cbc_encrypt:
-.L_CAST_cbc_encrypt_begin:
-
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl 28(%esp),%ebp
-
- movl 36(%esp),%ebx
- movl (%ebx),%esi
- movl 4(%ebx),%edi
- pushl %edi
- pushl %esi
- pushl %edi
- pushl %esi
- movl %esp,%ebx
- movl 36(%esp),%esi
- movl 40(%esp),%edi
-
- movl 56(%esp),%ecx
-
- movl 48(%esp),%eax
- pushl %eax
- pushl %ebx
- cmpl $0,%ecx
- jz .L002decrypt
- andl $4294967288,%ebp
- movl 8(%esp),%eax
- movl 12(%esp),%ebx
- jz .L003encrypt_finish
-.L004encrypt_loop:
- movl (%esi),%ecx
- movl 4(%esi),%edx
- xorl %ecx,%eax
- xorl %edx,%ebx
- bswap %eax
- bswap %ebx
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
- call .L_CAST_encrypt_begin
- movl 8(%esp),%eax
- movl 12(%esp),%ebx
- bswap %eax
- bswap %ebx
- movl %eax,(%edi)
- movl %ebx,4(%edi)
- addl $8,%esi
- addl $8,%edi
- subl $8,%ebp
- jnz .L004encrypt_loop
-.L003encrypt_finish:
- movl 52(%esp),%ebp
- andl $7,%ebp
- jz .L005finish
- call .L006PIC_point
-.L006PIC_point:
- popl %edx
- leal .L007cbc_enc_jmp_table-.L006PIC_point(%edx),%ecx
- movl (%ecx,%ebp,4),%ebp
- addl %edx,%ebp
- xorl %ecx,%ecx
- xorl %edx,%edx
- jmp *%ebp
-.L008ej7:
- movb 6(%esi),%dh
- shll $8,%edx
-.L009ej6:
- movb 5(%esi),%dh
-.L010ej5:
- movb 4(%esi),%dl
-.L011ej4:
- movl (%esi),%ecx
- jmp .L012ejend
-.L013ej3:
- movb 2(%esi),%ch
- shll $8,%ecx
-.L014ej2:
- movb 1(%esi),%ch
-.L015ej1:
- movb (%esi),%cl
-.L012ejend:
- xorl %ecx,%eax
- xorl %edx,%ebx
- bswap %eax
- bswap %ebx
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
- call .L_CAST_encrypt_begin
- movl 8(%esp),%eax
- movl 12(%esp),%ebx
- bswap %eax
- bswap %ebx
- movl %eax,(%edi)
- movl %ebx,4(%edi)
- jmp .L005finish
-.L002decrypt:
- andl $4294967288,%ebp
- movl 16(%esp),%eax
- movl 20(%esp),%ebx
- jz .L016decrypt_finish
-.L017decrypt_loop:
- movl (%esi),%eax
- movl 4(%esi),%ebx
- bswap %eax
- bswap %ebx
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
- call .L_CAST_decrypt_begin
- movl 8(%esp),%eax
- movl 12(%esp),%ebx
- bswap %eax
- bswap %ebx
- movl 16(%esp),%ecx
- movl 20(%esp),%edx
- xorl %eax,%ecx
- xorl %ebx,%edx
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl %ecx,(%edi)
- movl %edx,4(%edi)
- movl %eax,16(%esp)
- movl %ebx,20(%esp)
- addl $8,%esi
- addl $8,%edi
- subl $8,%ebp
- jnz .L017decrypt_loop
-.L016decrypt_finish:
- movl 52(%esp),%ebp
- andl $7,%ebp
- jz .L005finish
- movl (%esi),%eax
- movl 4(%esi),%ebx
- bswap %eax
- bswap %ebx
- movl %eax,8(%esp)
- movl %ebx,12(%esp)
- call .L_CAST_decrypt_begin
- movl 8(%esp),%eax
- movl 12(%esp),%ebx
- bswap %eax
- bswap %ebx
- movl 16(%esp),%ecx
- movl 20(%esp),%edx
- xorl %eax,%ecx
- xorl %ebx,%edx
- movl (%esi),%eax
- movl 4(%esi),%ebx
-.L018dj7:
- rorl $16,%edx
- movb %dl,6(%edi)
- shrl $16,%edx
-.L019dj6:
- movb %dh,5(%edi)
-.L020dj5:
- movb %dl,4(%edi)
-.L021dj4:
- movl %ecx,(%edi)
- jmp .L022djend
-.L023dj3:
- rorl $16,%ecx
- movb %cl,2(%edi)
- shll $16,%ecx
-.L024dj2:
- movb %ch,1(%esi)
-.L025dj1:
- movb %cl,(%esi)
-.L022djend:
- jmp .L005finish
-.L005finish:
- movl 60(%esp),%ecx
- addl $24,%esp
- movl %eax,(%ecx)
- movl %ebx,4(%ecx)
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-.align 64
-.L007cbc_enc_jmp_table:
-.long 0
-.long .L015ej1-.L006PIC_point
-.long .L014ej2-.L006PIC_point
-.long .L013ej3-.L006PIC_point
-.long .L011ej4-.L006PIC_point
-.long .L010ej5-.L006PIC_point
-.long .L009ej6-.L006PIC_point
-.long .L008ej7-.L006PIC_point
-.align 64
-.size CAST_cbc_encrypt,.-.L_CAST_cbc_encrypt_begin
diff --git a/secure/lib/libcrypto/i386/des-586.s b/secure/lib/libcrypto/i386/des-586.s
index 868b2ca..83bf07a 100644
--- a/secure/lib/libcrypto/i386/des-586.s
+++ b/secure/lib/libcrypto/i386/des-586.s
@@ -1001,7 +1001,7 @@ DES_encrypt1:
call .L000pic_point
.L000pic_point:
popl %ebp
- leal DES_SPtrans-.L000pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L001decrypt
@@ -1078,7 +1078,7 @@ DES_encrypt2:
call .L003pic_point
.L003pic_point:
popl %ebp
- leal DES_SPtrans-.L003pic_point(%ebp),%ebp
+ leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp
movl 24(%esp),%ecx
cmpl $0,%ebx
je .L004decrypt
@@ -1708,6 +1708,7 @@ DES_ede3_cbc_encrypt:
.size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin
.align 64
DES_SPtrans:
+.Ldes_sptrans:
.long 34080768,524288,33554434,34080770
.long 33554432,526338,524290,33554434
.long 526338,34080768,34078720,2050
diff --git a/secure/lib/libcrypto/i386/ghash-x86.s b/secure/lib/libcrypto/i386/ghash-x86.s
index a200a97..53d5e3f 100644
--- a/secure/lib/libcrypto/i386/ghash-x86.s
+++ b/secure/lib/libcrypto/i386/ghash-x86.s
@@ -946,27 +946,34 @@ gcm_init_clmul:
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
psllq $5,%xmm0
+ pxor %xmm0,%xmm3
+ psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
- movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $5,%xmm0
+ psrldq $8,%xmm3
pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
psrlq $1,%xmm0
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ psrlq $5,%xmm0
pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ pshufd $78,%xmm2,%xmm3
+ pshufd $78,%xmm0,%xmm4
+ pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
+ pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
+.byte 102,15,58,15,227,8
+ movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
@@ -984,11 +991,10 @@ gcm_gmult_clmul:
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
+ movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
@@ -999,25 +1005,26 @@ gcm_gmult_clmul:
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
psllq $5,%xmm0
+ pxor %xmm0,%xmm3
+ psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
- movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $5,%xmm0
+ psrldq $8,%xmm3
pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
psrlq $1,%xmm0
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ psrlq $5,%xmm0
pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
@@ -1049,127 +1056,115 @@ gcm_ghash_clmul:
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
+ movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
- movdqa %xmm6,%xmm7
pshufd $78,%xmm6,%xmm3
- pshufd $78,%xmm2,%xmm4
+ movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
- pxor %xmm2,%xmm4
+ leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
-.byte 102,15,58,68,220,0
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm7
- pxor %xmm4,%xmm6
+.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
- leal 32(%esi),%esi
+ nop
subl $32,%ebx
jbe .L014even_tail
+ jmp .L015mod_loop
+.align 32
.L015mod_loop:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
+ nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- movdqu (%esi),%xmm3
+.byte 102,15,58,68,229,16
movups (%edx),%xmm2
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
+ xorps %xmm6,%xmm0
+ movdqa (%ecx),%xmm5
+ xorps %xmm7,%xmm1
+ movdqu (%esi),%xmm7
+ pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
-.byte 102,15,56,0,221
+ pxor %xmm1,%xmm3
+.byte 102,15,56,0,253
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm3,%xmm0
.byte 102,15,56,0,245
- movdqa %xmm6,%xmm5
+ pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
- pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
psllq $5,%xmm0
+ pxor %xmm0,%xmm3
+ psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
+ movups 32(%edx),%xmm5
psllq $57,%xmm0
- movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pshufd $78,%xmm5,%xmm3
+ psrldq $8,%xmm3
+ pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ pshufd $78,%xmm7,%xmm3
+ movdqa %xmm0,%xmm4
+ psrlq $1,%xmm0
+ pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
- pxor %xmm5,%xmm3
- pshufd $78,%xmm2,%xmm5
- pxor %xmm2,%xmm5
.byte 102,15,58,68,250,17
- movdqa %xmm0,%xmm4
+ movups 16(%edx),%xmm2
+ pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
- psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
- movups 16(%edx),%xmm2
- xorps %xmm6,%xmm3
- xorps %xmm7,%xmm3
- movdqa %xmm3,%xmm5
- psrldq $8,%xmm3
- pslldq $8,%xmm5
- pxor %xmm3,%xmm7
- pxor %xmm5,%xmm6
- movdqa (%ecx),%xmm5
leal 32(%esi),%esi
subl $32,%ebx
ja .L015mod_loop
.L014even_tail:
+ pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
- pshufd $78,%xmm0,%xmm3
- pshufd $78,%xmm2,%xmm4
- pxor %xmm0,%xmm3
- pxor %xmm2,%xmm4
+ pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
-.byte 102,15,58,68,220,0
- xorps %xmm0,%xmm3
- xorps %xmm1,%xmm3
- movdqa %xmm3,%xmm4
- psrldq $8,%xmm3
- pslldq $8,%xmm4
- pxor %xmm3,%xmm1
- pxor %xmm4,%xmm0
- pxor %xmm6,%xmm0
- pxor %xmm7,%xmm1
- movdqa %xmm0,%xmm3
- psllq $1,%xmm0
+.byte 102,15,58,68,229,16
+ movdqa (%ecx),%xmm5
+ xorps %xmm6,%xmm0
+ xorps %xmm7,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pxor %xmm3,%xmm4
+ movdqa %xmm4,%xmm3
+ psrldq $8,%xmm4
+ pslldq $8,%xmm3
+ pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
+ movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
psllq $5,%xmm0
+ pxor %xmm0,%xmm3
+ psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
- movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $5,%xmm0
+ psrldq $8,%xmm3
pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
psrlq $1,%xmm0
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ psrlq $5,%xmm0
pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L016done
movups (%edx),%xmm2
@@ -1192,25 +1187,26 @@ gcm_ghash_clmul:
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
+ movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
- psllq $1,%xmm0
- pxor %xmm3,%xmm0
psllq $5,%xmm0
+ pxor %xmm0,%xmm3
+ psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
- movdqa %xmm0,%xmm4
+ movdqa %xmm0,%xmm3
pslldq $8,%xmm0
- psrldq $8,%xmm4
- pxor %xmm3,%xmm0
- pxor %xmm4,%xmm1
- movdqa %xmm0,%xmm4
- psrlq $5,%xmm0
+ psrldq $8,%xmm3
pxor %xmm4,%xmm0
+ pxor %xmm3,%xmm1
+ movdqa %xmm0,%xmm4
psrlq $1,%xmm0
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ psrlq $5,%xmm0
pxor %xmm4,%xmm0
- pxor %xmm1,%xmm4
psrlq $1,%xmm0
- pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
.L016done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
@@ -1225,12 +1221,6 @@ gcm_ghash_clmul:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 64
-.Lrem_4bit:
-.long 0,0,0,471859200,0,943718400,0,610271232
-.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
-.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
-.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.align 64
.Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
@@ -1264,6 +1254,12 @@ gcm_ghash_clmul:
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
+.align 64
+.Lrem_4bit:
+.long 0,0,0,471859200,0,943718400,0,610271232
+.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
+.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
+.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
diff --git a/secure/lib/libcrypto/i386/rc4-586.s b/secure/lib/libcrypto/i386/rc4-586.s
index c1f70a6..e9603ae 100644
--- a/secure/lib/libcrypto/i386/rc4-586.s
+++ b/secure/lib/libcrypto/i386/rc4-586.s
@@ -30,8 +30,8 @@ RC4:
movl (%edi,%eax,4),%ecx
andl $-4,%edx
jz .L002loop1
- testl $-8,%edx
movl %ebp,32(%esp)
+ testl $-8,%edx
jz .L003go4loop4
leal OPENSSL_ia32cap_P,%ebp
btl $26,(%ebp)
@@ -370,4 +370,4 @@ RC4_options:
.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.size RC4_options,.-.L_RC4_options_begin
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/sha1-586.s b/secure/lib/libcrypto/i386/sha1-586.s
index ffb8883..74b1ab4 100644
--- a/secure/lib/libcrypto/i386/sha1-586.s
+++ b/secure/lib/libcrypto/i386/sha1-586.s
@@ -19,8 +19,11 @@ sha1_block_data_order:
movl 4(%esi),%edx
testl $512,%edx
jz .L001x86
+ movl 8(%esi),%ecx
testl $16777216,%eax
jz .L001x86
+ testl $536870912,%ecx
+ jnz .Lshaext_shortcut
jmp .Lssse3_shortcut
.align 16
.L001x86:
@@ -1389,9 +1392,9 @@ sha1_block_data_order:
popl %ebp
ret
.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
-.type _sha1_block_data_order_ssse3,@function
+.type _sha1_block_data_order_shaext,@function
.align 16
-_sha1_block_data_order_ssse3:
+_sha1_block_data_order_shaext:
pushl %ebp
pushl %ebx
pushl %esi
@@ -1400,6 +1403,176 @@ _sha1_block_data_order_ssse3:
.L003pic_point:
popl %ebp
leal .LK_XX_XX-.L003pic_point(%ebp),%ebp
+.Lshaext_shortcut:
+ movl 20(%esp),%edi
+ movl %esp,%ebx
+ movl 24(%esp),%esi
+ movl 28(%esp),%ecx
+ subl $32,%esp
+ movdqu (%edi),%xmm0
+ movd 16(%edi),%xmm1
+ andl $-32,%esp
+ movdqa 80(%ebp),%xmm3
+ movdqu (%esi),%xmm4
+ pshufd $27,%xmm0,%xmm0
+ movdqu 16(%esi),%xmm5
+ pshufd $27,%xmm1,%xmm1
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,227
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,235
+.byte 102,15,56,0,243
+.byte 102,15,56,0,251
+ jmp .L004loop_shaext
+.align 16
+.L004loop_shaext:
+ decl %ecx
+ leal 64(%esi),%eax
+ movdqa %xmm1,(%esp)
+ paddd %xmm4,%xmm1
+ cmovnel %eax,%esi
+ movdqa %xmm0,16(%esp)
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,0
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,0
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,1
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,1
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+.byte 15,56,201,229
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,213
+ pxor %xmm6,%xmm4
+.byte 15,56,201,238
+.byte 15,56,202,231
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,2
+.byte 15,56,200,206
+ pxor %xmm7,%xmm5
+.byte 15,56,202,236
+.byte 15,56,201,247
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,2
+.byte 15,56,200,215
+ pxor %xmm4,%xmm6
+.byte 15,56,201,252
+.byte 15,56,202,245
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,204
+ pxor %xmm5,%xmm7
+.byte 15,56,202,254
+ movdqu (%esi),%xmm4
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,213
+ movdqu 16(%esi),%xmm5
+.byte 102,15,56,0,227
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+.byte 15,56,200,206
+ movdqu 32(%esi),%xmm6
+.byte 102,15,56,0,235
+ movdqa %xmm0,%xmm2
+.byte 15,58,204,193,3
+.byte 15,56,200,215
+ movdqu 48(%esi),%xmm7
+.byte 102,15,56,0,243
+ movdqa %xmm0,%xmm1
+.byte 15,58,204,194,3
+ movdqa (%esp),%xmm2
+.byte 102,15,56,0,251
+.byte 15,56,200,202
+ paddd 16(%esp),%xmm0
+ jnz .L004loop_shaext
+ pshufd $27,%xmm0,%xmm0
+ pshufd $27,%xmm1,%xmm1
+ movdqu %xmm0,(%edi)
+ movd %xmm1,16(%edi)
+ movl %ebx,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext
+.type _sha1_block_data_order_ssse3,@function
+.align 16
+_sha1_block_data_order_ssse3:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ call .L005pic_point
+.L005pic_point:
+ popl %ebp
+ leal .LK_XX_XX-.L005pic_point(%ebp),%ebp
.Lssse3_shortcut:
movdqa (%ebp),%xmm7
movdqa 16(%ebp),%xmm0
@@ -1447,936 +1620,917 @@ _sha1_block_data_order_ssse3:
movdqa %xmm1,16(%esp)
psubd %xmm7,%xmm1
movdqa %xmm2,32(%esp)
+ movl %ecx,%ebp
psubd %xmm7,%xmm2
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ xorl %edx,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebp,%esi
+ jmp .L006loop
.align 16
-.L004loop:
- addl (%esp),%edi
- xorl %edx,%ecx
-.byte 102,15,58,15,224,8
- movdqa %xmm3,%xmm6
+.L006loop:
+ rorl $2,%ebx
+ xorl %edx,%esi
movl %eax,%ebp
- roll $5,%eax
+ punpcklqdq %xmm1,%xmm4
+ movdqa %xmm3,%xmm6
+ addl (%esp),%edi
+ xorl %ecx,%ebx
paddd %xmm3,%xmm7
movdqa %xmm0,64(%esp)
- andl %ecx,%esi
- xorl %edx,%ecx
+ roll $5,%eax
+ addl %esi,%edi
psrldq $4,%xmm6
- xorl %edx,%esi
- addl %eax,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
pxor %xmm0,%xmm4
- rorl $2,%ebx
- addl %esi,%edi
+ addl %eax,%edi
+ rorl $7,%eax
pxor %xmm2,%xmm6
- addl 4(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%ebp
movl %edi,%esi
- roll $5,%edi
+ addl 4(%esp),%edx
pxor %xmm6,%xmm4
- andl %ebx,%ebp
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ roll $5,%edi
movdqa %xmm7,48(%esp)
- xorl %ecx,%ebp
- addl %edi,%edx
- movdqa %xmm4,%xmm0
- movdqa %xmm4,%xmm6
- rorl $7,%eax
addl %ebp,%edx
- addl 8(%esp),%ecx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm0
xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ movdqa %xmm4,%xmm6
+ xorl %ebx,%esi
pslldq $12,%xmm0
paddd %xmm4,%xmm4
movl %edx,%ebp
- roll $5,%edx
- andl %eax,%esi
- xorl %ebx,%eax
+ addl 8(%esp),%ecx
psrld $31,%xmm6
- xorl %ebx,%esi
- addl %edx,%ecx
+ xorl %eax,%edi
+ roll $5,%edx
movdqa %xmm0,%xmm7
- rorl $7,%edi
addl %esi,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
psrld $30,%xmm0
+ addl %edx,%ecx
+ rorl $7,%edx
por %xmm6,%xmm4
- addl 12(%esp),%ebx
- xorl %eax,%edi
+ xorl %eax,%ebp
movl %ecx,%esi
- roll $5,%ecx
+ addl 12(%esp),%ebx
pslld $2,%xmm7
+ xorl %edi,%edx
+ roll $5,%ecx
pxor %xmm0,%xmm4
- andl %edi,%ebp
- xorl %eax,%edi
movdqa 96(%esp),%xmm0
- xorl %eax,%ebp
- addl %ecx,%ebx
- pxor %xmm7,%xmm4
- movdqa %xmm2,%xmm5
- rorl $7,%edx
addl %ebp,%ebx
- addl 16(%esp),%eax
+ andl %edx,%esi
+ pxor %xmm7,%xmm4
+ pshufd $238,%xmm1,%xmm5
xorl %edi,%edx
-.byte 102,15,58,15,233,8
- movdqa %xmm4,%xmm7
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
movl %ebx,%ebp
- roll $5,%ebx
+ punpcklqdq %xmm2,%xmm5
+ movdqa %xmm4,%xmm7
+ addl 16(%esp),%eax
+ xorl %edx,%ecx
paddd %xmm4,%xmm0
movdqa %xmm1,80(%esp)
- andl %edx,%esi
- xorl %edi,%edx
+ roll $5,%ebx
+ addl %esi,%eax
psrldq $4,%xmm7
- xorl %edi,%esi
- addl %ebx,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
pxor %xmm1,%xmm5
- rorl $7,%ecx
- addl %esi,%eax
+ addl %ebx,%eax
+ rorl $7,%ebx
pxor %xmm3,%xmm7
- addl 20(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%ebp
movl %eax,%esi
- roll $5,%eax
+ addl 20(%esp),%edi
pxor %xmm7,%xmm5
- andl %ecx,%ebp
- xorl %edx,%ecx
+ xorl %ecx,%ebx
+ roll $5,%eax
movdqa %xmm0,(%esp)
- xorl %edx,%ebp
- addl %eax,%edi
- movdqa %xmm5,%xmm1
- movdqa %xmm5,%xmm7
- rorl $7,%ebx
addl %ebp,%edi
- addl 24(%esp),%edx
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm1
xorl %ecx,%ebx
+ addl %eax,%edi
+ rorl $7,%eax
+ movdqa %xmm5,%xmm7
+ xorl %ecx,%esi
pslldq $12,%xmm1
paddd %xmm5,%xmm5
movl %edi,%ebp
- roll $5,%edi
- andl %ebx,%esi
- xorl %ecx,%ebx
+ addl 24(%esp),%edx
psrld $31,%xmm7
- xorl %ecx,%esi
- addl %edi,%edx
+ xorl %ebx,%eax
+ roll $5,%edi
movdqa %xmm1,%xmm0
- rorl $7,%eax
addl %esi,%edx
+ andl %eax,%ebp
+ xorl %ebx,%eax
psrld $30,%xmm1
+ addl %edi,%edx
+ rorl $7,%edi
por %xmm7,%xmm5
- addl 28(%esp),%ecx
- xorl %ebx,%eax
+ xorl %ebx,%ebp
movl %edx,%esi
- roll $5,%edx
+ addl 28(%esp),%ecx
pslld $2,%xmm0
+ xorl %eax,%edi
+ roll $5,%edx
pxor %xmm1,%xmm5
- andl %eax,%ebp
- xorl %ebx,%eax
movdqa 112(%esp),%xmm1
- xorl %ebx,%ebp
- addl %edx,%ecx
- pxor %xmm0,%xmm5
- movdqa %xmm3,%xmm6
- rorl $7,%edi
addl %ebp,%ecx
- addl 32(%esp),%ebx
+ andl %edi,%esi
+ pxor %xmm0,%xmm5
+ pshufd $238,%xmm2,%xmm6
xorl %eax,%edi
-.byte 102,15,58,15,242,8
- movdqa %xmm5,%xmm0
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
- roll $5,%ecx
+ punpcklqdq %xmm3,%xmm6
+ movdqa %xmm5,%xmm0
+ addl 32(%esp),%ebx
+ xorl %edi,%edx
paddd %xmm5,%xmm1
movdqa %xmm2,96(%esp)
- andl %edi,%esi
- xorl %eax,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
psrldq $4,%xmm0
- xorl %eax,%esi
- addl %ecx,%ebx
+ andl %edx,%ebp
+ xorl %edi,%edx
pxor %xmm2,%xmm6
- rorl $7,%edx
- addl %esi,%ebx
+ addl %ecx,%ebx
+ rorl $7,%ecx
pxor %xmm4,%xmm0
- addl 36(%esp),%eax
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
- roll $5,%ebx
+ addl 36(%esp),%eax
pxor %xmm0,%xmm6
- andl %edx,%ebp
- xorl %edi,%edx
+ xorl %edx,%ecx
+ roll $5,%ebx
movdqa %xmm1,16(%esp)
- xorl %edi,%ebp
- addl %ebx,%eax
- movdqa %xmm6,%xmm2
- movdqa %xmm6,%xmm0
- rorl $7,%ecx
addl %ebp,%eax
- addl 40(%esp),%edi
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm2
xorl %edx,%ecx
+ addl %ebx,%eax
+ rorl $7,%ebx
+ movdqa %xmm6,%xmm0
+ xorl %edx,%esi
pslldq $12,%xmm2
paddd %xmm6,%xmm6
movl %eax,%ebp
- roll $5,%eax
- andl %ecx,%esi
- xorl %edx,%ecx
+ addl 40(%esp),%edi
psrld $31,%xmm0
- xorl %edx,%esi
- addl %eax,%edi
+ xorl %ecx,%ebx
+ roll $5,%eax
movdqa %xmm2,%xmm1
- rorl $7,%ebx
addl %esi,%edi
+ andl %ebx,%ebp
+ xorl %ecx,%ebx
psrld $30,%xmm2
+ addl %eax,%edi
+ rorl $7,%eax
por %xmm0,%xmm6
- addl 44(%esp),%edx
- xorl %ecx,%ebx
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm0
movl %edi,%esi
- roll $5,%edi
+ addl 44(%esp),%edx
pslld $2,%xmm1
+ xorl %ebx,%eax
+ roll $5,%edi
pxor %xmm2,%xmm6
- andl %ebx,%ebp
- xorl %ecx,%ebx
movdqa 112(%esp),%xmm2
- xorl %ecx,%ebp
- addl %edi,%edx
- pxor %xmm1,%xmm6
- movdqa %xmm4,%xmm7
- rorl $7,%eax
addl %ebp,%edx
- addl 48(%esp),%ecx
+ andl %eax,%esi
+ pxor %xmm1,%xmm6
+ pshufd $238,%xmm3,%xmm7
xorl %ebx,%eax
-.byte 102,15,58,15,251,8
- movdqa %xmm6,%xmm1
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%esi
movl %edx,%ebp
- roll $5,%edx
+ punpcklqdq %xmm4,%xmm7
+ movdqa %xmm6,%xmm1
+ addl 48(%esp),%ecx
+ xorl %eax,%edi
paddd %xmm6,%xmm2
movdqa %xmm3,64(%esp)
- andl %eax,%esi
- xorl %ebx,%eax
+ roll $5,%edx
+ addl %esi,%ecx
psrldq $4,%xmm1
- xorl %ebx,%esi
- addl %edx,%ecx
+ andl %edi,%ebp
+ xorl %eax,%edi
pxor %xmm3,%xmm7
- rorl $7,%edi
- addl %esi,%ecx
+ addl %edx,%ecx
+ rorl $7,%edx
pxor %xmm5,%xmm1
- addl 52(%esp),%ebx
- xorl %eax,%edi
+ xorl %eax,%ebp
movl %ecx,%esi
- roll $5,%ecx
+ addl 52(%esp),%ebx
pxor %xmm1,%xmm7
- andl %edi,%ebp
- xorl %eax,%edi
+ xorl %edi,%edx
+ roll $5,%ecx
movdqa %xmm2,32(%esp)
- xorl %eax,%ebp
- addl %ecx,%ebx
- movdqa %xmm7,%xmm3
- movdqa %xmm7,%xmm1
- rorl $7,%edx
addl %ebp,%ebx
- addl 56(%esp),%eax
+ andl %edx,%esi
+ movdqa %xmm7,%xmm3
xorl %edi,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ movdqa %xmm7,%xmm1
+ xorl %edi,%esi
pslldq $12,%xmm3
paddd %xmm7,%xmm7
movl %ebx,%ebp
- roll $5,%ebx
- andl %edx,%esi
- xorl %edi,%edx
+ addl 56(%esp),%eax
psrld $31,%xmm1
- xorl %edi,%esi
- addl %ebx,%eax
+ xorl %edx,%ecx
+ roll $5,%ebx
movdqa %xmm3,%xmm2
- rorl $7,%ecx
addl %esi,%eax
+ andl %ecx,%ebp
+ xorl %edx,%ecx
psrld $30,%xmm3
+ addl %ebx,%eax
+ rorl $7,%ebx
por %xmm1,%xmm7
- addl 60(%esp),%edi
- xorl %edx,%ecx
+ xorl %edx,%ebp
movdqa 80(%esp),%xmm1
movl %eax,%esi
- roll $5,%eax
+ addl 60(%esp),%edi
pslld $2,%xmm2
+ xorl %ecx,%ebx
+ roll $5,%eax
pxor %xmm3,%xmm7
- andl %ecx,%ebp
- xorl %edx,%ecx
movdqa 112(%esp),%xmm3
- xorl %edx,%ebp
- addl %eax,%edi
- pxor %xmm2,%xmm7
- rorl $7,%ebx
addl %ebp,%edi
- movdqa %xmm7,%xmm2
- addl (%esp),%edx
- pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
+ andl %ebx,%esi
+ pxor %xmm2,%xmm7
+ pshufd $238,%xmm6,%xmm2
xorl %ecx,%ebx
+ addl %eax,%edi
+ rorl $7,%eax
+ pxor %xmm4,%xmm0
+ punpcklqdq %xmm7,%xmm2
+ xorl %ecx,%esi
movl %edi,%ebp
- roll $5,%edi
+ addl (%esp),%edx
pxor %xmm1,%xmm0
movdqa %xmm4,80(%esp)
- andl %ebx,%esi
- xorl %ecx,%ebx
+ xorl %ebx,%eax
+ roll $5,%edi
movdqa %xmm3,%xmm4
+ addl %esi,%edx
paddd %xmm7,%xmm3
- xorl %ecx,%esi
- addl %edi,%edx
+ andl %eax,%ebp
pxor %xmm2,%xmm0
- rorl $7,%eax
- addl %esi,%edx
- addl 4(%esp),%ecx
xorl %ebx,%eax
+ addl %edi,%edx
+ rorl $7,%edi
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
movl %edx,%esi
+ addl 4(%esp),%ecx
+ xorl %eax,%edi
roll $5,%edx
- andl %eax,%ebp
- xorl %ebx,%eax
pslld $2,%xmm0
- xorl %ebx,%ebp
- addl %edx,%ecx
- psrld $30,%xmm2
- rorl $7,%edi
addl %ebp,%ecx
- addl 8(%esp),%ebx
+ andl %edi,%esi
+ psrld $30,%xmm2
xorl %eax,%edi
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
movl %ecx,%ebp
+ addl 8(%esp),%ebx
+ xorl %edi,%edx
roll $5,%ecx
por %xmm2,%xmm0
- andl %edi,%esi
- xorl %eax,%edi
+ addl %esi,%ebx
+ andl %edx,%ebp
movdqa 96(%esp),%xmm2
- xorl %eax,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- rorl $7,%edx
- addl %esi,%ebx
addl 12(%esp),%eax
- movdqa %xmm0,%xmm3
- xorl %edi,%edx
+ xorl %edi,%ebp
movl %ebx,%esi
+ pshufd $238,%xmm7,%xmm3
roll $5,%ebx
- andl %edx,%ebp
- xorl %edi,%edx
- xorl %edi,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
addl 16(%esp),%edi
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
- xorl %edx,%esi
+ punpcklqdq %xmm0,%xmm3
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm2,%xmm1
movdqa %xmm5,96(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm4,%xmm5
- paddd %xmm0,%xmm4
rorl $7,%ebx
- addl %esi,%edi
+ paddd %xmm0,%xmm4
+ addl %eax,%edi
pxor %xmm3,%xmm1
addl 20(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
pslld $2,%xmm1
addl 24(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm3
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
por %xmm3,%xmm1
addl 28(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movdqa 64(%esp),%xmm3
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
- movdqa %xmm1,%xmm4
addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ pshufd $238,%xmm0,%xmm4
+ addl %ecx,%ebx
addl 32(%esp),%eax
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
- xorl %edi,%esi
+ punpcklqdq %xmm1,%xmm4
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
pxor %xmm3,%xmm2
movdqa %xmm6,64(%esp)
- xorl %edx,%esi
- addl %ebx,%eax
+ addl %esi,%eax
+ xorl %edx,%ebp
movdqa 128(%esp),%xmm6
- paddd %xmm1,%xmm5
rorl $7,%ecx
- addl %esi,%eax
+ paddd %xmm1,%xmm5
+ addl %ebx,%eax
pxor %xmm4,%xmm2
addl 36(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
pslld $2,%xmm2
addl 40(%esp),%edx
- xorl %ecx,%esi
+ xorl %ebx,%esi
psrld $30,%xmm4
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
addl %esi,%edx
+ xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
por %xmm4,%xmm2
addl 44(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movdqa 80(%esp),%xmm4
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
- movdqa %xmm2,%xmm5
addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ pshufd $238,%xmm1,%xmm5
+ addl %edx,%ecx
addl 48(%esp),%ebx
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %eax,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
pxor %xmm4,%xmm3
movdqa %xmm7,80(%esp)
- xorl %edi,%esi
- addl %ecx,%ebx
+ addl %esi,%ebx
+ xorl %edi,%ebp
movdqa %xmm6,%xmm7
- paddd %xmm2,%xmm6
rorl $7,%edx
- addl %esi,%ebx
+ paddd %xmm2,%xmm6
+ addl %ecx,%ebx
pxor %xmm5,%xmm3
addl 52(%esp),%eax
- xorl %edi,%ebp
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
pslld $2,%xmm3
addl 56(%esp),%edi
- xorl %edx,%esi
+ xorl %ecx,%esi
psrld $30,%xmm5
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
addl %esi,%edi
+ xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
por %xmm5,%xmm3
addl 60(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movdqa 96(%esp),%xmm5
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
- movdqa %xmm3,%xmm6
addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ pshufd $238,%xmm2,%xmm6
+ addl %edi,%edx
addl (%esp),%ecx
pxor %xmm0,%xmm4
-.byte 102,15,58,15,242,8
- xorl %ebx,%esi
+ punpcklqdq %xmm3,%xmm6
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
pxor %xmm5,%xmm4
movdqa %xmm0,96(%esp)
- xorl %eax,%esi
- addl %edx,%ecx
+ addl %esi,%ecx
+ xorl %eax,%ebp
movdqa %xmm7,%xmm0
- paddd %xmm3,%xmm7
rorl $7,%edi
- addl %esi,%ecx
+ paddd %xmm3,%xmm7
+ addl %edx,%ecx
pxor %xmm6,%xmm4
addl 4(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
movdqa %xmm4,%xmm6
movdqa %xmm7,48(%esp)
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
addl %ebp,%ebx
+ xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
pslld $2,%xmm4
addl 8(%esp),%eax
- xorl %edi,%esi
+ xorl %edx,%esi
psrld $30,%xmm6
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
addl %esi,%eax
+ xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
por %xmm6,%xmm4
addl 12(%esp),%edi
- xorl %edx,%ebp
+ xorl %ecx,%ebp
movdqa 64(%esp),%xmm6
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
- movdqa %xmm4,%xmm7
addl %ebp,%edi
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ pshufd $238,%xmm3,%xmm7
+ addl %eax,%edi
addl 16(%esp),%edx
pxor %xmm1,%xmm5
-.byte 102,15,58,15,251,8
- xorl %ecx,%esi
+ punpcklqdq %xmm4,%xmm7
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
pxor %xmm6,%xmm5
movdqa %xmm1,64(%esp)
- xorl %ebx,%esi
- addl %edi,%edx
+ addl %esi,%edx
+ xorl %ebx,%ebp
movdqa %xmm0,%xmm1
- paddd %xmm4,%xmm0
rorl $7,%eax
- addl %esi,%edx
+ paddd %xmm4,%xmm0
+ addl %edi,%edx
pxor %xmm7,%xmm5
addl 20(%esp),%ecx
- xorl %ebx,%ebp
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
movdqa %xmm5,%xmm7
movdqa %xmm0,(%esp)
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
pslld $2,%xmm5
addl 24(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
psrld $30,%xmm7
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
addl %esi,%ebx
+ xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
por %xmm7,%xmm5
addl 28(%esp),%eax
- xorl %edi,%ebp
movdqa 80(%esp),%xmm7
+ rorl $7,%ecx
movl %ebx,%esi
- roll $5,%ebx
xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
- movdqa %xmm5,%xmm0
+ roll $5,%ebx
+ pshufd $238,%xmm4,%xmm0
addl %ebp,%eax
- movl %ecx,%ebp
- pxor %xmm2,%xmm6
-.byte 102,15,58,15,196,8
+ xorl %ecx,%esi
xorl %edx,%ecx
+ addl %ebx,%eax
addl 32(%esp),%edi
- andl %edx,%ebp
- pxor %xmm7,%xmm6
- movdqa %xmm2,80(%esp)
+ pxor %xmm2,%xmm6
+ punpcklqdq %xmm5,%xmm0
andl %ecx,%esi
+ xorl %edx,%ecx
rorl $7,%ebx
- movdqa %xmm1,%xmm2
- paddd %xmm5,%xmm1
- addl %ebp,%edi
+ pxor %xmm7,%xmm6
+ movdqa %xmm2,80(%esp)
movl %eax,%ebp
- pxor %xmm0,%xmm6
+ xorl %ecx,%esi
roll $5,%eax
+ movdqa %xmm1,%xmm2
addl %esi,%edi
- xorl %edx,%ecx
+ paddd %xmm5,%xmm1
+ xorl %ebx,%ebp
+ pxor %xmm0,%xmm6
+ xorl %ecx,%ebx
addl %eax,%edi
+ addl 36(%esp),%edx
+ andl %ebx,%ebp
movdqa %xmm6,%xmm0
movdqa %xmm1,16(%esp)
- movl %ebx,%esi
xorl %ecx,%ebx
- addl 36(%esp),%edx
- andl %ecx,%esi
- pslld $2,%xmm6
- andl %ebx,%ebp
rorl $7,%eax
- psrld $30,%xmm0
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
+ pslld $2,%xmm6
addl %ebp,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- por %xmm0,%xmm6
- movl %eax,%ebp
+ xorl %eax,%esi
+ psrld $30,%xmm0
xorl %ebx,%eax
- movdqa 96(%esp),%xmm0
+ addl %edi,%edx
addl 40(%esp),%ecx
- andl %ebx,%ebp
andl %eax,%esi
+ xorl %ebx,%eax
rorl $7,%edi
- addl %ebp,%ecx
- movdqa %xmm6,%xmm1
+ por %xmm0,%xmm6
movl %edx,%ebp
+ xorl %eax,%esi
+ movdqa 96(%esp),%xmm0
roll $5,%edx
addl %esi,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- movl %edi,%esi
+ xorl %edi,%ebp
xorl %eax,%edi
+ addl %edx,%ecx
+ pshufd $238,%xmm5,%xmm1
addl 44(%esp),%ebx
- andl %eax,%esi
andl %edi,%ebp
+ xorl %eax,%edi
rorl $7,%edx
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
addl %ebp,%ebx
- xorl %eax,%edi
+ xorl %edx,%esi
+ xorl %edi,%edx
addl %ecx,%ebx
- movl %edx,%ebp
+ addl 48(%esp),%eax
pxor %xmm3,%xmm7
-.byte 102,15,58,15,205,8
+ punpcklqdq %xmm6,%xmm1
+ andl %edx,%esi
xorl %edi,%edx
- addl 48(%esp),%eax
- andl %edi,%ebp
+ rorl $7,%ecx
pxor %xmm0,%xmm7
movdqa %xmm3,96(%esp)
- andl %edx,%esi
- rorl $7,%ecx
- movdqa 144(%esp),%xmm3
- paddd %xmm6,%xmm2
- addl %ebp,%eax
movl %ebx,%ebp
- pxor %xmm1,%xmm7
+ xorl %edx,%esi
roll $5,%ebx
+ movdqa 144(%esp),%xmm3
addl %esi,%eax
- xorl %edi,%edx
+ paddd %xmm6,%xmm2
+ xorl %ecx,%ebp
+ pxor %xmm1,%xmm7
+ xorl %edx,%ecx
addl %ebx,%eax
+ addl 52(%esp),%edi
+ andl %ecx,%ebp
movdqa %xmm7,%xmm1
movdqa %xmm2,32(%esp)
- movl %ecx,%esi
xorl %edx,%ecx
- addl 52(%esp),%edi
- andl %edx,%esi
- pslld $2,%xmm7
- andl %ecx,%ebp
rorl $7,%ebx
- psrld $30,%xmm1
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
+ pslld $2,%xmm7
addl %ebp,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- por %xmm1,%xmm7
- movl %ebx,%ebp
+ xorl %ebx,%esi
+ psrld $30,%xmm1
xorl %ecx,%ebx
- movdqa 64(%esp),%xmm1
+ addl %eax,%edi
addl 56(%esp),%edx
- andl %ecx,%ebp
andl %ebx,%esi
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %ebp,%edx
- movdqa %xmm7,%xmm2
+ por %xmm1,%xmm7
movl %edi,%ebp
+ xorl %ebx,%esi
+ movdqa 64(%esp),%xmm1
roll $5,%edi
addl %esi,%edx
- xorl %ecx,%ebx
- addl %edi,%edx
- movl %eax,%esi
+ xorl %eax,%ebp
xorl %ebx,%eax
+ addl %edi,%edx
+ pshufd $238,%xmm6,%xmm2
addl 60(%esp),%ecx
- andl %ebx,%esi
andl %eax,%ebp
+ xorl %ebx,%eax
rorl $7,%edi
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
addl %ebp,%ecx
- xorl %ebx,%eax
+ xorl %edi,%esi
+ xorl %eax,%edi
addl %edx,%ecx
- movl %edi,%ebp
+ addl (%esp),%ebx
pxor %xmm4,%xmm0
-.byte 102,15,58,15,214,8
+ punpcklqdq %xmm7,%xmm2
+ andl %edi,%esi
xorl %eax,%edi
- addl (%esp),%ebx
- andl %eax,%ebp
+ rorl $7,%edx
pxor %xmm1,%xmm0
movdqa %xmm4,64(%esp)
- andl %edi,%esi
- rorl $7,%edx
- movdqa %xmm3,%xmm4
- paddd %xmm7,%xmm3
- addl %ebp,%ebx
movl %ecx,%ebp
- pxor %xmm2,%xmm0
+ xorl %edi,%esi
roll $5,%ecx
+ movdqa %xmm3,%xmm4
addl %esi,%ebx
- xorl %eax,%edi
+ paddd %xmm7,%xmm3
+ xorl %edx,%ebp
+ pxor %xmm2,%xmm0
+ xorl %edi,%edx
addl %ecx,%ebx
+ addl 4(%esp),%eax
+ andl %edx,%ebp
movdqa %xmm0,%xmm2
movdqa %xmm3,48(%esp)
- movl %edx,%esi
xorl %edi,%edx
- addl 4(%esp),%eax
- andl %edi,%esi
- pslld $2,%xmm0
- andl %edx,%ebp
rorl $7,%ecx
- psrld $30,%xmm2
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
+ pslld $2,%xmm0
addl %ebp,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- por %xmm2,%xmm0
- movl %ecx,%ebp
+ xorl %ecx,%esi
+ psrld $30,%xmm2
xorl %edx,%ecx
- movdqa 80(%esp),%xmm2
+ addl %ebx,%eax
addl 8(%esp),%edi
- andl %edx,%ebp
andl %ecx,%esi
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %ebp,%edi
- movdqa %xmm0,%xmm3
+ por %xmm2,%xmm0
movl %eax,%ebp
+ xorl %ecx,%esi
+ movdqa 80(%esp),%xmm2
roll $5,%eax
addl %esi,%edi
- xorl %edx,%ecx
- addl %eax,%edi
- movl %ebx,%esi
+ xorl %ebx,%ebp
xorl %ecx,%ebx
+ addl %eax,%edi
+ pshufd $238,%xmm7,%xmm3
addl 12(%esp),%edx
- andl %ecx,%esi
andl %ebx,%ebp
+ xorl %ecx,%ebx
rorl $7,%eax
- addl %esi,%edx
movl %edi,%esi
+ xorl %ebx,%ebp
roll $5,%edi
addl %ebp,%edx
- xorl %ecx,%ebx
+ xorl %eax,%esi
+ xorl %ebx,%eax
addl %edi,%edx
- movl %eax,%ebp
+ addl 16(%esp),%ecx
pxor %xmm5,%xmm1
-.byte 102,15,58,15,223,8
+ punpcklqdq %xmm0,%xmm3
+ andl %eax,%esi
xorl %ebx,%eax
- addl 16(%esp),%ecx
- andl %ebx,%ebp
+ rorl $7,%edi
pxor %xmm2,%xmm1
movdqa %xmm5,80(%esp)
- andl %eax,%esi
- rorl $7,%edi
- movdqa %xmm4,%xmm5
- paddd %xmm0,%xmm4
- addl %ebp,%ecx
movl %edx,%ebp
- pxor %xmm3,%xmm1
+ xorl %eax,%esi
roll $5,%edx
+ movdqa %xmm4,%xmm5
addl %esi,%ecx
- xorl %ebx,%eax
+ paddd %xmm0,%xmm4
+ xorl %edi,%ebp
+ pxor %xmm3,%xmm1
+ xorl %eax,%edi
addl %edx,%ecx
+ addl 20(%esp),%ebx
+ andl %edi,%ebp
movdqa %xmm1,%xmm3
movdqa %xmm4,(%esp)
- movl %edi,%esi
xorl %eax,%edi
- addl 20(%esp),%ebx
- andl %eax,%esi
- pslld $2,%xmm1
- andl %edi,%ebp
rorl $7,%edx
- psrld $30,%xmm3
- addl %esi,%ebx
movl %ecx,%esi
+ xorl %edi,%ebp
roll $5,%ecx
+ pslld $2,%xmm1
addl %ebp,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- por %xmm3,%xmm1
- movl %edx,%ebp
+ xorl %edx,%esi
+ psrld $30,%xmm3
xorl %edi,%edx
- movdqa 96(%esp),%xmm3
+ addl %ecx,%ebx
addl 24(%esp),%eax
- andl %edi,%ebp
andl %edx,%esi
+ xorl %edi,%edx
rorl $7,%ecx
- addl %ebp,%eax
- movdqa %xmm1,%xmm4
+ por %xmm3,%xmm1
movl %ebx,%ebp
+ xorl %edx,%esi
+ movdqa 96(%esp),%xmm3
roll $5,%ebx
addl %esi,%eax
- xorl %edi,%edx
- addl %ebx,%eax
- movl %ecx,%esi
+ xorl %ecx,%ebp
xorl %edx,%ecx
+ addl %ebx,%eax
+ pshufd $238,%xmm0,%xmm4
addl 28(%esp),%edi
- andl %edx,%esi
andl %ecx,%ebp
+ xorl %edx,%ecx
rorl $7,%ebx
- addl %esi,%edi
movl %eax,%esi
+ xorl %ecx,%ebp
roll $5,%eax
addl %ebp,%edi
- xorl %edx,%ecx
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
addl %eax,%edi
- movl %ebx,%ebp
+ addl 32(%esp),%edx
pxor %xmm6,%xmm2
-.byte 102,15,58,15,224,8
+ punpcklqdq %xmm1,%xmm4
+ andl %ebx,%esi
xorl %ecx,%ebx
- addl 32(%esp),%edx
- andl %ecx,%ebp
+ rorl $7,%eax
pxor %xmm3,%xmm2
movdqa %xmm6,96(%esp)
- andl %ebx,%esi
- rorl $7,%eax
- movdqa %xmm5,%xmm6
- paddd %xmm1,%xmm5
- addl %ebp,%edx
movl %edi,%ebp
- pxor %xmm4,%xmm2
+ xorl %ebx,%esi
roll $5,%edi
+ movdqa %xmm5,%xmm6
addl %esi,%edx
- xorl %ecx,%ebx
+ paddd %xmm1,%xmm5
+ xorl %eax,%ebp
+ pxor %xmm4,%xmm2
+ xorl %ebx,%eax
addl %edi,%edx
+ addl 36(%esp),%ecx
+ andl %eax,%ebp
movdqa %xmm2,%xmm4
movdqa %xmm5,16(%esp)
- movl %eax,%esi
xorl %ebx,%eax
- addl 36(%esp),%ecx
- andl %ebx,%esi
- pslld $2,%xmm2
- andl %eax,%ebp
rorl $7,%edi
- psrld $30,%xmm4
- addl %esi,%ecx
movl %edx,%esi
+ xorl %eax,%ebp
roll $5,%edx
+ pslld $2,%xmm2
addl %ebp,%ecx
- xorl %ebx,%eax
- addl %edx,%ecx
- por %xmm4,%xmm2
- movl %edi,%ebp
+ xorl %edi,%esi
+ psrld $30,%xmm4
xorl %eax,%edi
- movdqa 64(%esp),%xmm4
+ addl %edx,%ecx
addl 40(%esp),%ebx
- andl %eax,%ebp
andl %edi,%esi
+ xorl %eax,%edi
rorl $7,%edx
- addl %ebp,%ebx
- movdqa %xmm2,%xmm5
+ por %xmm4,%xmm2
movl %ecx,%ebp
+ xorl %edi,%esi
+ movdqa 64(%esp),%xmm4
roll $5,%ecx
addl %esi,%ebx
- xorl %eax,%edi
- addl %ecx,%ebx
- movl %edx,%esi
+ xorl %edx,%ebp
xorl %edi,%edx
+ addl %ecx,%ebx
+ pshufd $238,%xmm1,%xmm5
addl 44(%esp),%eax
- andl %edi,%esi
andl %edx,%ebp
+ xorl %edi,%edx
rorl $7,%ecx
- addl %esi,%eax
movl %ebx,%esi
+ xorl %edx,%ebp
roll $5,%ebx
addl %ebp,%eax
- xorl %edi,%edx
+ xorl %edx,%esi
addl %ebx,%eax
addl 48(%esp),%edi
pxor %xmm7,%xmm3
-.byte 102,15,58,15,233,8
- xorl %edx,%esi
+ punpcklqdq %xmm2,%xmm5
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
pxor %xmm4,%xmm3
movdqa %xmm7,64(%esp)
- xorl %ecx,%esi
- addl %eax,%edi
+ addl %esi,%edi
+ xorl %ecx,%ebp
movdqa %xmm6,%xmm7
- paddd %xmm2,%xmm6
rorl $7,%ebx
- addl %esi,%edi
+ paddd %xmm2,%xmm6
+ addl %eax,%edi
pxor %xmm5,%xmm3
addl 52(%esp),%edx
- xorl %ecx,%ebp
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
movdqa %xmm3,%xmm5
movdqa %xmm6,32(%esp)
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
addl %ebp,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
pslld $2,%xmm3
addl 56(%esp),%ecx
- xorl %ebx,%esi
+ xorl %eax,%esi
psrld $30,%xmm5
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
addl %esi,%ecx
+ xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
por %xmm5,%xmm3
addl 60(%esp),%ebx
- xorl %eax,%ebp
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
addl %ebp,%ebx
- addl (%esp),%eax
- paddd %xmm3,%xmm7
xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl (%esp),%eax
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
- movdqa %xmm7,48(%esp)
- addl %ebx,%eax
- rorl $7,%ecx
addl %esi,%eax
- addl 4(%esp),%edi
xorl %edx,%ebp
+ rorl $7,%ecx
+ paddd %xmm3,%xmm7
+ addl %ebx,%eax
+ addl 4(%esp),%edi
+ xorl %ecx,%ebp
movl %eax,%esi
+ movdqa %xmm7,48(%esp)
roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
addl %ebp,%edi
- addl 8(%esp),%edx
xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 8(%esp),%edx
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
addl %esi,%edx
- addl 12(%esp),%ecx
xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 12(%esp),%ecx
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
addl %ebp,%ecx
+ xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
movl 196(%esp),%ebp
cmpl 200(%esp),%ebp
- je .L005done
+ je .L007done
movdqa 160(%esp),%xmm7
movdqa 176(%esp),%xmm6
movdqu (%ebp),%xmm0
@@ -2388,113 +2542,112 @@ _sha1_block_data_order_ssse3:
movl %ebp,196(%esp)
movdqa %xmm7,96(%esp)
addl 16(%esp),%ebx
- xorl %eax,%esi
-.byte 102,15,56,0,206
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- paddd %xmm7,%xmm0
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
addl %esi,%ebx
- movdqa %xmm0,(%esp)
- addl 20(%esp),%eax
xorl %edi,%ebp
- psubd %xmm7,%xmm0
+ rorl $7,%edx
+.byte 102,15,56,0,206
+ addl %ecx,%ebx
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
movl %ebx,%esi
+ paddd %xmm7,%xmm0
roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
- addl 24(%esp),%edi
xorl %edx,%esi
+ rorl $7,%ecx
+ movdqa %xmm0,(%esp)
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
movl %eax,%ebp
+ psubd %xmm7,%xmm0
roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
addl %esi,%edi
- addl 28(%esp),%edx
xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
addl %ebp,%edx
- addl 32(%esp),%ecx
xorl %ebx,%esi
-.byte 102,15,56,0,214
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- paddd %xmm7,%xmm1
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
addl %esi,%ecx
- movdqa %xmm1,16(%esp)
- addl 36(%esp),%ebx
xorl %eax,%ebp
- psubd %xmm7,%xmm1
+ rorl $7,%edi
+.byte 102,15,56,0,214
+ addl %edx,%ecx
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
movl %ecx,%esi
+ paddd %xmm7,%xmm1
roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
addl %ebp,%ebx
- addl 40(%esp),%eax
xorl %edi,%esi
+ rorl $7,%edx
+ movdqa %xmm1,16(%esp)
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
movl %ebx,%ebp
+ psubd %xmm7,%xmm1
roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
addl %esi,%eax
- addl 44(%esp),%edi
xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
addl %ebp,%edi
- addl 48(%esp),%edx
xorl %ecx,%esi
-.byte 102,15,56,0,222
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- paddd %xmm7,%xmm2
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
addl %esi,%edx
- movdqa %xmm2,32(%esp)
- addl 52(%esp),%ecx
xorl %ebx,%ebp
- psubd %xmm7,%xmm2
+ rorl $7,%eax
+.byte 102,15,56,0,222
+ addl %edi,%edx
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
movl %edx,%esi
+ paddd %xmm7,%xmm2
roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
addl %ebp,%ecx
- addl 56(%esp),%ebx
xorl %eax,%esi
+ rorl $7,%edi
+ movdqa %xmm2,32(%esp)
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
movl %ecx,%ebp
+ psubd %xmm7,%xmm2
roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
addl %esi,%ebx
- addl 60(%esp),%eax
xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
addl 4(%ebp),%esi
@@ -2504,109 +2657,112 @@ _sha1_block_data_order_ssse3:
movl %esi,4(%ebp)
addl 16(%ebp),%edi
movl %ecx,8(%ebp)
- movl %esi,%ebx
+ movl %ecx,%ebx
movl %edx,12(%ebp)
+ xorl %edx,%ebx
movl %edi,16(%ebp)
- movdqa %xmm1,%xmm4
- jmp .L004loop
+ movl %esi,%ebp
+ pshufd $238,%xmm0,%xmm4
+ andl %ebx,%esi
+ movl %ebp,%ebx
+ jmp .L006loop
.align 16
-.L005done:
+.L007done:
addl 16(%esp),%ebx
- xorl %eax,%esi
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
addl %esi,%ebx
- addl 20(%esp),%eax
xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%esp),%eax
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
- addl 24(%esp),%edi
xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%esp),%edi
+ xorl %ecx,%esi
movl %eax,%ebp
roll $5,%eax
- xorl %ecx,%esi
- addl %eax,%edi
- rorl $7,%ebx
addl %esi,%edi
- addl 28(%esp),%edx
xorl %ecx,%ebp
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 28(%esp),%edx
+ xorl %ebx,%ebp
movl %edi,%esi
roll $5,%edi
- xorl %ebx,%ebp
- addl %edi,%edx
- rorl $7,%eax
addl %ebp,%edx
- addl 32(%esp),%ecx
xorl %ebx,%esi
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 32(%esp),%ecx
+ xorl %eax,%esi
movl %edx,%ebp
roll $5,%edx
- xorl %eax,%esi
- addl %edx,%ecx
- rorl $7,%edi
addl %esi,%ecx
- addl 36(%esp),%ebx
xorl %eax,%ebp
+ rorl $7,%edi
+ addl %edx,%ecx
+ addl 36(%esp),%ebx
+ xorl %edi,%ebp
movl %ecx,%esi
roll $5,%ecx
- xorl %edi,%ebp
- addl %ecx,%ebx
- rorl $7,%edx
addl %ebp,%ebx
- addl 40(%esp),%eax
xorl %edi,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%esp),%eax
+ xorl %edx,%esi
movl %ebx,%ebp
roll $5,%ebx
- xorl %edx,%esi
- addl %ebx,%eax
- rorl $7,%ecx
addl %esi,%eax
- addl 44(%esp),%edi
xorl %edx,%ebp
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%esp),%edi
+ xorl %ecx,%ebp
movl %eax,%esi
roll $5,%eax
- xorl %ecx,%ebp
- addl %eax,%edi
- rorl $7,%ebx
addl %ebp,%edi
- addl 48(%esp),%edx
xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%edi
+ addl 48(%esp),%edx
+ xorl %ebx,%esi
movl %edi,%ebp
roll $5,%edi
- xorl %ebx,%esi
- addl %edi,%edx
- rorl $7,%eax
addl %esi,%edx
- addl 52(%esp),%ecx
xorl %ebx,%ebp
+ rorl $7,%eax
+ addl %edi,%edx
+ addl 52(%esp),%ecx
+ xorl %eax,%ebp
movl %edx,%esi
roll $5,%edx
- xorl %eax,%ebp
- addl %edx,%ecx
- rorl $7,%edi
addl %ebp,%ecx
- addl 56(%esp),%ebx
xorl %eax,%esi
+ rorl $7,%edi
+ addl %edx,%ecx
+ addl 56(%esp),%ebx
+ xorl %edi,%esi
movl %ecx,%ebp
roll $5,%ecx
- xorl %edi,%esi
- addl %ecx,%ebx
- rorl $7,%edx
addl %esi,%ebx
- addl 60(%esp),%eax
xorl %edi,%ebp
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%esp),%eax
+ xorl %edx,%ebp
movl %ebx,%esi
roll $5,%ebx
- xorl %edx,%ebp
- addl %ebx,%eax
- rorl $7,%ecx
addl %ebp,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
movl 192(%esp),%ebp
addl (%ebp),%eax
movl 204(%esp),%esp
@@ -2632,8 +2788,9 @@ _sha1_block_data_order_ssse3:
.long 2400959708,2400959708,2400959708,2400959708
.long 3395469782,3395469782,3395469782,3395469782
.long 66051,67438087,134810123,202182159
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/sha256-586.s b/secure/lib/libcrypto/i386/sha256-586.s
index 7ea3748..a3b82f8 100644
--- a/secure/lib/libcrypto/i386/sha256-586.s
+++ b/secure/lib/libcrypto/i386/sha256-586.s
@@ -26,234 +26,4553 @@ sha256_block_data_order:
movl %edi,4(%esp)
movl %eax,8(%esp)
movl %ebx,12(%esp)
+ leal OPENSSL_ia32cap_P,%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ movl 8(%edx),%edx
+ testl $16777216,%ecx
+ jz .L003no_xmm
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ testl $536870912,%edx
+ jnz .L004shaext
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ testl $512,%ebx
+ jnz .L005SSSE3
+.L003no_xmm:
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L006unrolled
+ jmp .L002loop
.align 16
.L002loop:
movl (%edi),%eax
movl 4(%edi),%ebx
movl 8(%edi),%ecx
- movl 12(%edi),%edx
bswap %eax
+ movl 12(%edi),%edx
bswap %ebx
- bswap %ecx
- bswap %edx
pushl %eax
+ bswap %ecx
pushl %ebx
+ bswap %edx
pushl %ecx
pushl %edx
movl 16(%edi),%eax
movl 20(%edi),%ebx
movl 24(%edi),%ecx
- movl 28(%edi),%edx
bswap %eax
+ movl 28(%edi),%edx
bswap %ebx
- bswap %ecx
- bswap %edx
pushl %eax
+ bswap %ecx
pushl %ebx
+ bswap %edx
pushl %ecx
pushl %edx
movl 32(%edi),%eax
movl 36(%edi),%ebx
movl 40(%edi),%ecx
- movl 44(%edi),%edx
bswap %eax
+ movl 44(%edi),%edx
bswap %ebx
- bswap %ecx
- bswap %edx
pushl %eax
+ bswap %ecx
pushl %ebx
+ bswap %edx
pushl %ecx
pushl %edx
movl 48(%edi),%eax
movl 52(%edi),%ebx
movl 56(%edi),%ecx
- movl 60(%edi),%edx
bswap %eax
+ movl 60(%edi),%edx
bswap %ebx
- bswap %ecx
- bswap %edx
pushl %eax
+ bswap %ecx
pushl %ebx
+ bswap %edx
pushl %ecx
pushl %edx
addl $64,%edi
- subl $32,%esp
- movl %edi,100(%esp)
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edi
- movl %ebx,4(%esp)
- movl %ecx,8(%esp)
- movl %edi,12(%esp)
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
movl 16(%esi),%edx
movl 20(%esi),%ebx
movl 24(%esi),%ecx
movl 28(%esi),%edi
- movl %ebx,20(%esp)
- movl %ecx,24(%esp)
- movl %edi,28(%esp)
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
.align 16
-.L00300_15:
- movl 92(%esp),%ebx
+.L00700_15:
movl %edx,%ecx
+ movl 24(%esp),%esi
rorl $14,%ecx
- movl 20(%esp),%esi
+ movl 28(%esp),%edi
xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
rorl $5,%ecx
- xorl %edx,%ecx
- rorl $6,%ecx
- movl 24(%esp),%edi
- addl %ecx,%ebx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
xorl %edi,%esi
- movl %edx,16(%esp)
+ rorl $6,%edx
movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00700_15
+ movl 156(%esp),%ecx
+ jmp .L00816_63
+.align 16
+.L00816_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
andl %edx,%esi
- movl 12(%esp),%edx
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
xorl %edi,%esi
- movl %eax,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
addl %esi,%ebx
rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00816_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 16
+.L006unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp .L009grand_loop
+.align 16
+.L009grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3248222580,%esi
- jne .L00300_15
- movl 152(%esp),%ebx
-.align 16
-.L00416_63:
- movl %ebx,%esi
- movl 100(%esp),%ecx
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
rorl $11,%esi
movl %ecx,%edi
+ rorl $2,%ecx
xorl %ebx,%esi
+ shrl $3,%ebx
rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
shrl $3,%ebx
- rorl $2,%edi
+ rorl $7,%esi
+ xorl %edi,%ecx
xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
xorl %ecx,%edi
- rorl $17,%edi
- shrl $10,%ecx
- addl 156(%esp),%ebx
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
xorl %ecx,%edi
- addl 120(%esp),%ebx
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
addl %edi,%ebx
- rorl $14,%ecx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 20(%esp),%esi
- xorl %edx,%ecx
- rorl $5,%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
movl %ebx,92(%esp)
- xorl %edx,%ecx
- rorl $6,%ecx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
movl 24(%esp),%edi
- addl %ecx,%ebx
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
xorl %edi,%esi
- movl %edx,16(%esp)
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- andl %edx,%esi
- movl 12(%esp),%edx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
xorl %edi,%esi
- movl %eax,%edi
- addl %esi,%ebx
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
rorl $2,%ecx
- addl %ebx,%edx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
movl 8(%esp),%edi
- addl %ecx,%ebx
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
movl %eax,%ecx
- subl $4,%esp
- orl %esi,%eax
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
andl %esi,%ecx
- andl %edi,%eax
- movl (%ebp),%esi
- orl %ecx,%eax
- addl $4,%ebp
- addl %ebx,%eax
- movl 152(%esp),%ebx
- addl %esi,%edx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
addl %esi,%eax
- cmpl $3329325298,%esi
- jne .L00416_63
- movl 352(%esp),%esi
- movl 4(%esp),%ebx
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
addl (%esi),%eax
- addl 4(%esi),%ebx
- addl 8(%esi),%ecx
- addl 12(%esi),%edi
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
movl %eax,(%esi)
- movl %ebx,4(%esi)
- movl %ecx,8(%esi)
- movl %edi,12(%esi)
- movl 20(%esp),%eax
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
movl 24(%esp),%ebx
movl 28(%esp),%ecx
- movl 356(%esp),%edi
addl 16(%esi),%edx
- addl 20(%esi),%eax
+ addl 20(%esi),%edi
addl 24(%esi),%ebx
addl 28(%esi),%ecx
movl %edx,16(%esi)
- movl %eax,20(%esi)
+ movl %edi,20(%esi)
movl %ebx,24(%esi)
movl %ecx,28(%esi)
- addl $352,%esp
- subl $256,%ebp
- cmpl 8(%esp),%edi
- jb .L002loop
- movl 12(%esp),%esp
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb .L009grand_loop
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L004shaext:
+ subl $32,%esp
+ movdqu (%esi),%xmm1
+ leal 128(%ebp),%ebp
+ movdqu 16(%esi),%xmm2
+ movdqa 128(%ebp),%xmm7
+ pshufd $27,%xmm1,%xmm0
+ pshufd $177,%xmm1,%xmm1
+ pshufd $27,%xmm2,%xmm2
+.byte 102,15,58,15,202,8
+ punpcklqdq %xmm0,%xmm2
+ jmp .L010loop_shaext
+.align 16
+.L010loop_shaext:
+ movdqu (%edi),%xmm3
+ movdqu 16(%edi),%xmm4
+ movdqu 32(%edi),%xmm5
+.byte 102,15,56,0,223
+ movdqu 48(%edi),%xmm6
+ movdqa %xmm2,16(%esp)
+ movdqa -128(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 102,15,56,0,231
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ nop
+ movdqa %xmm1,(%esp)
+.byte 15,56,203,202
+ movdqa -112(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 102,15,56,0,239
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ leal 64(%edi),%edi
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 102,15,56,0,247
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -80(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa -64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa -48(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa -32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa -16(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa (%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 16(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+ nop
+ paddd %xmm7,%xmm6
+.byte 15,56,204,220
+.byte 15,56,203,202
+ movdqa 32(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,205,245
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm6,%xmm7
+.byte 102,15,58,15,253,4
+ nop
+ paddd %xmm7,%xmm3
+.byte 15,56,204,229
+.byte 15,56,203,202
+ movdqa 48(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+.byte 15,56,205,222
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm3,%xmm7
+.byte 102,15,58,15,254,4
+ nop
+ paddd %xmm7,%xmm4
+.byte 15,56,204,238
+.byte 15,56,203,202
+ movdqa 64(%ebp),%xmm0
+ paddd %xmm3,%xmm0
+.byte 15,56,205,227
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm4,%xmm7
+.byte 102,15,58,15,251,4
+ nop
+ paddd %xmm7,%xmm5
+.byte 15,56,204,243
+.byte 15,56,203,202
+ movdqa 80(%ebp),%xmm0
+ paddd %xmm4,%xmm0
+.byte 15,56,205,236
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ movdqa %xmm5,%xmm7
+.byte 102,15,58,15,252,4
+.byte 15,56,203,202
+ paddd %xmm7,%xmm6
+ movdqa 96(%ebp),%xmm0
+ paddd %xmm5,%xmm0
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+.byte 15,56,205,245
+ movdqa 128(%ebp),%xmm7
+.byte 15,56,203,202
+ movdqa 112(%ebp),%xmm0
+ paddd %xmm6,%xmm0
+ nop
+.byte 15,56,203,209
+ pshufd $14,%xmm0,%xmm0
+ cmpl %edi,%eax
+ nop
+.byte 15,56,203,202
+ paddd 16(%esp),%xmm2
+ paddd (%esp),%xmm1
+ jnz .L010loop_shaext
+ pshufd $177,%xmm2,%xmm2
+ pshufd $27,%xmm1,%xmm7
+ pshufd $177,%xmm1,%xmm1
+ punpckhqdq %xmm2,%xmm1
+.byte 102,15,58,15,215,8
+ movl 44(%esp),%esp
+ movdqu %xmm1,(%esi)
+ movdqu %xmm2,16(%esi)
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L005SSSE3:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,4(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,8(%esp)
+ movl %edi,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%edi
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ movdqa 256(%ebp),%xmm7
+ jmp .L011grand_ssse3
+.align 16
+.L011grand_ssse3:
+ movdqu (%edi),%xmm0
+ movdqu 16(%edi),%xmm1
+ movdqu 32(%edi),%xmm2
+ movdqu 48(%edi),%xmm3
+ addl $64,%edi
+.byte 102,15,56,0,199
+ movl %edi,100(%esp)
+.byte 102,15,56,0,207
+ movdqa (%ebp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 16(%ebp),%xmm5
+ paddd %xmm0,%xmm4
+.byte 102,15,56,0,223
+ movdqa 32(%ebp),%xmm6
+ paddd %xmm1,%xmm5
+ movdqa 48(%ebp),%xmm7
+ movdqa %xmm4,32(%esp)
+ paddd %xmm2,%xmm6
+ movdqa %xmm5,48(%esp)
+ paddd %xmm3,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ jmp .L012ssse3_00_47
+.align 16
+.L012ssse3_00_47:
+ addl $64,%ebp
+ movl %edx,%ecx
+ movdqa %xmm1,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm3,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,224,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,250,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm3,%xmm7
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm0
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm0
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm0,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa (%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm0
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm0,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,32(%esp)
+ movl %edx,%ecx
+ movdqa %xmm2,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm0,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,225,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,251,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm0,%xmm7
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm1
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm1
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm1,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 16(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm1
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm1,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,48(%esp)
+ movl %edx,%ecx
+ movdqa %xmm3,%xmm4
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ movdqa %xmm1,%xmm7
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+.byte 102,15,58,15,226,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,248,4
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm1,%xmm7
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm2
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm2
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ pshufd $80,%xmm2,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 32(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm2
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ paddd %xmm2,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,64(%esp)
+ movl %edx,%ecx
+ movdqa %xmm0,%xmm4
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ movdqa %xmm2,%xmm7
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+.byte 102,15,58,15,227,4
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+.byte 102,15,58,15,249,4
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ movdqa %xmm4,%xmm5
+ rorl $6,%edx
+ movl %eax,%ecx
+ movdqa %xmm4,%xmm6
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ psrld $3,%xmm4
+ movl %eax,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ psrld $7,%xmm6
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ pshufd $250,%xmm2,%xmm7
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ pslld $14,%xmm5
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm4
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ psrld $11,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm5,%xmm4
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ pslld $11,%xmm5
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ pxor %xmm6,%xmm4
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ movdqa %xmm7,%xmm6
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ pxor %xmm5,%xmm4
+ movl %ebx,%ecx
+ addl %edi,%edx
+ psrld $10,%xmm7
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm4,%xmm3
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ psrlq $17,%xmm6
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ pxor %xmm6,%xmm7
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ psrlq $2,%xmm6
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ pshufd $128,%xmm7,%xmm7
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ psrldq $8,%xmm7
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ paddd %xmm7,%xmm3
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ pshufd $80,%xmm3,%xmm7
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ movdqa %xmm7,%xmm6
+ rorl $11,%ecx
+ psrld $10,%xmm7
+ andl %eax,%ebx
+ psrlq $17,%xmm6
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ pxor %xmm6,%xmm7
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ psrlq $2,%xmm6
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ pxor %xmm6,%xmm7
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ pshufd $8,%xmm7,%xmm7
+ xorl %edi,%esi
+ rorl $5,%edx
+ movdqa 48(%ebp),%xmm6
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ pslldq $8,%xmm7
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ paddd %xmm7,%xmm3
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ paddd %xmm3,%xmm6
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movdqa %xmm6,80(%esp)
+ cmpl $66051,64(%ebp)
+ jne .L012ssse3_00_47
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 32(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 36(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 40(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 44(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 48(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 52(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 56(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 60(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 20(%esp),%esi
+ xorl %ecx,%edx
+ movl 24(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 4(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 28(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 64(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 12(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 16(%esp),%esi
+ xorl %ecx,%edx
+ movl 20(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,12(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl (%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,28(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 24(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 68(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 12(%esp),%esi
+ xorl %ecx,%edx
+ movl 16(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 28(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,24(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 20(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 72(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 4(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 8(%esp),%esi
+ xorl %ecx,%edx
+ movl 12(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,4(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 24(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,20(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 16(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 76(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 4(%esp),%esi
+ xorl %ecx,%edx
+ movl 8(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 20(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,16(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 12(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 80(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 28(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl (%esp),%esi
+ xorl %ecx,%edx
+ movl 4(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,28(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 16(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,12(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl 8(%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 84(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %ecx,%eax
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 28(%esp),%esi
+ xorl %ecx,%edx
+ movl (%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%edx
+ movl 12(%esp),%edi
+ movl %eax,%esi
+ rorl $9,%ecx
+ movl %eax,8(%esp)
+ xorl %eax,%ecx
+ xorl %edi,%eax
+ addl 4(%esp),%edx
+ rorl $11,%ecx
+ andl %eax,%ebx
+ xorl %esi,%ecx
+ addl 88(%esp),%edx
+ xorl %edi,%ebx
+ rorl $2,%ecx
+ addl %edx,%ebx
+ addl 20(%esp),%edx
+ addl %ecx,%ebx
+ movl %edx,%ecx
+ rorl $14,%edx
+ movl 24(%esp),%esi
+ xorl %ecx,%edx
+ movl 28(%esp),%edi
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,20(%esp)
+ xorl %ecx,%edx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %ebx,%ecx
+ addl %edi,%edx
+ movl 8(%esp),%edi
+ movl %ebx,%esi
+ rorl $9,%ecx
+ movl %ebx,4(%esp)
+ xorl %ebx,%ecx
+ xorl %edi,%ebx
+ addl (%esp),%edx
+ rorl $11,%ecx
+ andl %ebx,%eax
+ xorl %esi,%ecx
+ addl 92(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %ecx,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebx
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebx,4(%esp)
+ xorl %edi,%ebx
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %edi,20(%esp)
+ movl 28(%esp),%edi
+ movl %ecx,24(%esi)
+ addl 28(%esi),%edi
+ movl %ecx,24(%esp)
+ movl %edi,28(%esi)
+ movl %edi,28(%esp)
+ movl 100(%esp),%edi
+ movdqa 64(%ebp),%xmm7
+ subl $192,%ebp
+ cmpl 104(%esp),%edi
+ jb .L011grand_ssse3
+ movl 108(%esp),%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
-.align 64
-.L001K256:
-.long 1116352408,1899447441,3049323471,3921009573
-.long 961987163,1508970993,2453635748,2870763221
-.long 3624381080,310598401,607225278,1426881987
-.long 1925078388,2162078206,2614888103,3248222580
-.long 3835390401,4022224774,264347078,604807628
-.long 770255983,1249150122,1555081692,1996064986
-.long 2554220882,2821834349,2952996808,3210313671
-.long 3336571891,3584528711,113926993,338241895
-.long 666307205,773529912,1294757372,1396182291
-.long 1695183700,1986661051,2177026350,2456956037
-.long 2730485921,2820302411,3259730800,3345764771
-.long 3516065817,3600352804,4094571909,275423344
-.long 430227734,506948616,659060556,883997877
-.long 958139571,1322822218,1537002063,1747873779
-.long 1955562222,2024104815,2227730452,2361852424
-.long 2428436474,2756734187,3204031479,3329325298
.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
-.byte 62,0
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/sha512-586.s b/secure/lib/libcrypto/i386/sha512-586.s
index a37f850..2dc6f1a 100644
--- a/secure/lib/libcrypto/i386/sha512-586.s
+++ b/secure/lib/libcrypto/i386/sha512-586.s
@@ -27,249 +27,2243 @@ sha512_block_data_order:
movl %eax,8(%esp)
movl %ebx,12(%esp)
leal OPENSSL_ia32cap_P,%edx
- btl $26,(%edx)
- jnc .L002loop_x86
+ movl (%edx),%ecx
+ testl $67108864,%ecx
+ jz .L002loop_x86
+ movl 4(%edx),%edx
movq (%esi),%mm0
+ andl $16777216,%ecx
movq 8(%esi),%mm1
+ andl $512,%edx
movq 16(%esi),%mm2
+ orl %edx,%ecx
movq 24(%esi),%mm3
movq 32(%esi),%mm4
movq 40(%esi),%mm5
movq 48(%esi),%mm6
movq 56(%esi),%mm7
+ cmpl $16777728,%ecx
+ je .L003SSSE3
subl $80,%esp
+ jmp .L004loop_sse2
.align 16
-.L003loop_sse2:
+.L004loop_sse2:
movq %mm1,8(%esp)
movq %mm2,16(%esp)
movq %mm3,24(%esp)
movq %mm5,40(%esp)
movq %mm6,48(%esp)
+ pxor %mm1,%mm2
movq %mm7,56(%esp)
- movl (%edi),%ecx
- movl 4(%edi),%edx
+ movq %mm0,%mm3
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
addl $8,%edi
- bswap %ecx
- bswap %edx
- movl %ecx,76(%esp)
- movl %edx,72(%esp)
+ movl $15,%edx
+ bswap %eax
+ bswap %ebx
+ jmp .L00500_14_sse2
.align 16
-.L00400_14_sse2:
+.L00500_14_sse2:
+ movd %eax,%mm1
movl (%edi),%eax
+ movd %ebx,%mm7
movl 4(%edi),%ebx
addl $8,%edi
bswap %eax
bswap %ebx
- movl %eax,68(%esp)
- movl %ebx,64(%esp)
+ punpckldq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ decl %edx
+ jnz .L00500_14_sse2
+ movd %eax,%mm1
+ movd %ebx,%mm7
+ punpckldq %mm1,%mm7
movq %mm4,%mm1
- movq %mm4,%mm2
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm3,%mm0
+ movq %mm7,72(%esp)
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
+ psllq $4,%mm4
paddq (%ebp),%mm7
- pxor %mm2,%mm3
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm2,%mm3
+ movq %mm0,%mm2
+ addl $8,%ebp
+ paddq %mm6,%mm3
+ pxor %mm0,%mm0
+ movl $32,%edx
+ jmp .L00616_79_sse2
+.align 16
+.L00616_79_sse2:
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm0
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm2
+ addl $8,%ebp
+ movq 88(%esp),%mm5
+ movq %mm7,%mm1
+ psrlq $1,%mm7
+ movq %mm5,%mm6
+ psrlq $6,%mm5
+ psllq $56,%mm1
+ paddq %mm3,%mm2
+ movq %mm7,%mm3
+ psrlq $6,%mm7
+ pxor %mm1,%mm3
+ psllq $7,%mm1
+ pxor %mm7,%mm3
+ psrlq $1,%mm7
+ pxor %mm1,%mm3
+ movq %mm5,%mm1
+ psrlq $13,%mm5
+ pxor %mm3,%mm7
+ psllq $3,%mm6
+ pxor %mm5,%mm1
+ paddq 200(%esp),%mm7
+ pxor %mm6,%mm1
+ psrlq $42,%mm5
+ paddq 128(%esp),%mm7
+ pxor %mm5,%mm1
+ psllq $42,%mm6
+ movq 40(%esp),%mm5
+ pxor %mm6,%mm1
+ movq 48(%esp),%mm6
+ paddq %mm1,%mm7
+ movq %mm4,%mm1
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ movq %mm7,72(%esp)
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ paddq (%ebp),%mm7
+ pxor %mm4,%mm3
movq 24(%esp),%mm4
- paddq %mm5,%mm3
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ subl $8,%esp
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 192(%esp),%mm7
+ paddq %mm6,%mm0
+ addl $8,%ebp
+ decl %edx
+ jnz .L00616_79_sse2
+ paddq %mm3,%mm0
+ movq 8(%esp),%mm1
+ movq 24(%esp),%mm3
+ movq 40(%esp),%mm5
+ movq 48(%esp),%mm6
+ movq 56(%esp),%mm7
+ pxor %mm1,%mm2
+ paddq (%esi),%mm0
+ paddq 8(%esi),%mm1
+ paddq 16(%esi),%mm2
+ paddq 24(%esi),%mm3
+ paddq 32(%esi),%mm4
+ paddq 40(%esi),%mm5
+ paddq 48(%esi),%mm6
+ paddq 56(%esi),%mm7
+ movl $640,%eax
+ movq %mm0,(%esi)
+ movq %mm1,8(%esi)
+ movq %mm2,16(%esi)
+ movq %mm3,24(%esi)
+ movq %mm4,32(%esi)
+ movq %mm5,40(%esi)
+ movq %mm6,48(%esi)
+ movq %mm7,56(%esi)
+ leal (%esp,%eax,1),%esp
+ subl %eax,%ebp
+ cmpl 88(%esp),%edi
+ jb .L004loop_sse2
+ movl 92(%esp),%esp
+ emms
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003SSSE3:
+ leal -64(%esp),%edx
+ subl $256,%esp
+ movdqa 640(%ebp),%xmm1
+ movdqu (%edi),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%edi),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%edi),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%edi),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%edi),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%edi),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%edi),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%edi),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movdqa %xmm2,-16(%edx)
+ nop
+.align 32
+.L007loop_ssse3:
+ movdqa 16(%edx),%xmm2
+ movdqa %xmm3,48(%edx)
+ leal 128(%ebp),%ebp
+ movq %mm1,8(%esp)
+ movl %edi,%ebx
+ movq %mm2,16(%esp)
+ leal 128(%edi),%edi
+ movq %mm3,24(%esp)
+ cmpl %eax,%edi
+ movq %mm5,40(%esp)
+ cmovbl %edi,%ebx
+ movq %mm6,48(%esp)
+ movl $4,%ecx
+ pxor %mm1,%mm2
+ movq %mm7,56(%esp)
+ pxor %mm3,%mm3
+ jmp .L00800_47_ssse3
+.align 32
+.L00800_47_ssse3:
+ movdqa %xmm5,%xmm3
+ movdqa %xmm2,%xmm1
+.byte 102,15,58,15,208,8
+ movdqa %xmm4,(%edx)
+.byte 102,15,58,15,220,8
+ movdqa %xmm2,%xmm4
+ psrlq $7,%xmm2
+ paddq %xmm3,%xmm0
+ movdqa %xmm4,%xmm3
+ psrlq $1,%xmm4
+ psllq $56,%xmm3
+ pxor %xmm4,%xmm2
+ psrlq $7,%xmm4
+ pxor %xmm3,%xmm2
+ psllq $7,%xmm3
+ pxor %xmm4,%xmm2
+ movdqa %xmm7,%xmm4
+ pxor %xmm3,%xmm2
+ movdqa %xmm7,%xmm3
+ psrlq $6,%xmm4
+ paddq %xmm2,%xmm0
+ movdqa %xmm7,%xmm2
+ psrlq $19,%xmm3
+ psllq $3,%xmm2
+ pxor %xmm3,%xmm4
+ psrlq $42,%xmm3
+ pxor %xmm2,%xmm4
+ psllq $42,%xmm2
+ pxor %xmm3,%xmm4
+ movdqa 32(%edx),%xmm3
+ pxor %xmm2,%xmm4
+ movdqa (%ebp),%xmm2
+ movq %mm4,%mm1
+ paddq %xmm4,%xmm0
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ paddq %xmm0,%xmm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm2,-128(%edx)
+ movdqa %xmm6,%xmm4
+ movdqa %xmm3,%xmm2
+.byte 102,15,58,15,217,8
+ movdqa %xmm5,16(%edx)
+.byte 102,15,58,15,229,8
+ movdqa %xmm3,%xmm5
+ psrlq $7,%xmm3
+ paddq %xmm4,%xmm1
+ movdqa %xmm5,%xmm4
+ psrlq $1,%xmm5
+ psllq $56,%xmm4
+ pxor %xmm5,%xmm3
+ psrlq $7,%xmm5
+ pxor %xmm4,%xmm3
+ psllq $7,%xmm4
+ pxor %xmm5,%xmm3
+ movdqa %xmm0,%xmm5
+ pxor %xmm4,%xmm3
+ movdqa %xmm0,%xmm4
+ psrlq $6,%xmm5
+ paddq %xmm3,%xmm1
+ movdqa %xmm0,%xmm3
+ psrlq $19,%xmm4
+ psllq $3,%xmm3
+ pxor %xmm4,%xmm5
+ psrlq $42,%xmm4
+ pxor %xmm3,%xmm5
+ psllq $42,%xmm3
+ pxor %xmm4,%xmm5
+ movdqa 48(%edx),%xmm4
+ pxor %xmm3,%xmm5
+ movdqa 16(%ebp),%xmm3
+ movq %mm4,%mm1
+ paddq %xmm5,%xmm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm1,%xmm3
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm3,-112(%edx)
+ movdqa %xmm7,%xmm5
+ movdqa %xmm4,%xmm3
+.byte 102,15,58,15,226,8
+ movdqa %xmm6,32(%edx)
+.byte 102,15,58,15,238,8
+ movdqa %xmm4,%xmm6
+ psrlq $7,%xmm4
+ paddq %xmm5,%xmm2
+ movdqa %xmm6,%xmm5
+ psrlq $1,%xmm6
+ psllq $56,%xmm5
+ pxor %xmm6,%xmm4
+ psrlq $7,%xmm6
+ pxor %xmm5,%xmm4
+ psllq $7,%xmm5
+ pxor %xmm6,%xmm4
+ movdqa %xmm1,%xmm6
+ pxor %xmm5,%xmm4
+ movdqa %xmm1,%xmm5
+ psrlq $6,%xmm6
+ paddq %xmm4,%xmm2
+ movdqa %xmm1,%xmm4
+ psrlq $19,%xmm5
+ psllq $3,%xmm4
+ pxor %xmm5,%xmm6
+ psrlq $42,%xmm5
+ pxor %xmm4,%xmm6
+ psllq $42,%xmm4
+ pxor %xmm5,%xmm6
+ movdqa (%edx),%xmm5
+ pxor %xmm4,%xmm6
+ movdqa 32(%ebp),%xmm4
+ movq %mm4,%mm1
+ paddq %xmm6,%xmm2
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm2,%xmm4
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm4,-96(%edx)
+ movdqa %xmm0,%xmm6
+ movdqa %xmm5,%xmm4
+.byte 102,15,58,15,235,8
+ movdqa %xmm7,48(%edx)
+.byte 102,15,58,15,247,8
+ movdqa %xmm5,%xmm7
+ psrlq $7,%xmm5
+ paddq %xmm6,%xmm3
+ movdqa %xmm7,%xmm6
+ psrlq $1,%xmm7
+ psllq $56,%xmm6
+ pxor %xmm7,%xmm5
+ psrlq $7,%xmm7
+ pxor %xmm6,%xmm5
+ psllq $7,%xmm6
+ pxor %xmm7,%xmm5
+ movdqa %xmm2,%xmm7
+ pxor %xmm6,%xmm5
+ movdqa %xmm2,%xmm6
+ psrlq $6,%xmm7
+ paddq %xmm5,%xmm3
+ movdqa %xmm2,%xmm5
+ psrlq $19,%xmm6
+ psllq $3,%xmm5
+ pxor %xmm6,%xmm7
+ psrlq $42,%xmm6
+ pxor %xmm5,%xmm7
+ psllq $42,%xmm5
+ pxor %xmm6,%xmm7
+ movdqa 16(%edx),%xmm6
+ pxor %xmm5,%xmm7
+ movdqa 48(%ebp),%xmm5
+ movq %mm4,%mm1
+ paddq %xmm7,%xmm3
+ movq -80(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm3,%xmm5
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- pand %mm2,%mm5
- pand %mm1,%mm0
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $53,%dl
- jne .L00400_14_sse2
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm5,-80(%edx)
+ movdqa %xmm1,%xmm7
+ movdqa %xmm6,%xmm5
+.byte 102,15,58,15,244,8
+ movdqa %xmm0,(%edx)
+.byte 102,15,58,15,248,8
+ movdqa %xmm6,%xmm0
+ psrlq $7,%xmm6
+ paddq %xmm7,%xmm4
+ movdqa %xmm0,%xmm7
+ psrlq $1,%xmm0
+ psllq $56,%xmm7
+ pxor %xmm0,%xmm6
+ psrlq $7,%xmm0
+ pxor %xmm7,%xmm6
+ psllq $7,%xmm7
+ pxor %xmm0,%xmm6
+ movdqa %xmm3,%xmm0
+ pxor %xmm7,%xmm6
+ movdqa %xmm3,%xmm7
+ psrlq $6,%xmm0
+ paddq %xmm6,%xmm4
+ movdqa %xmm3,%xmm6
+ psrlq $19,%xmm7
+ psllq $3,%xmm6
+ pxor %xmm7,%xmm0
+ psrlq $42,%xmm7
+ pxor %xmm6,%xmm0
+ psllq $42,%xmm6
+ pxor %xmm7,%xmm0
+ movdqa 32(%edx),%xmm7
+ pxor %xmm6,%xmm0
+ movdqa 64(%ebp),%xmm6
movq %mm4,%mm1
- movq %mm4,%mm2
+ paddq %xmm0,%xmm4
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ paddq %xmm4,%xmm6
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
- pxor %mm6,%mm5
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm6,-64(%edx)
+ movdqa %xmm2,%xmm0
+ movdqa %xmm7,%xmm6
+.byte 102,15,58,15,253,8
+ movdqa %xmm1,16(%edx)
+.byte 102,15,58,15,193,8
+ movdqa %xmm7,%xmm1
+ psrlq $7,%xmm7
+ paddq %xmm0,%xmm5
+ movdqa %xmm1,%xmm0
+ psrlq $1,%xmm1
+ psllq $56,%xmm0
+ pxor %xmm1,%xmm7
+ psrlq $7,%xmm1
+ pxor %xmm0,%xmm7
+ psllq $7,%xmm0
+ pxor %xmm1,%xmm7
+ movdqa %xmm4,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm4,%xmm0
+ psrlq $6,%xmm1
+ paddq %xmm7,%xmm5
+ movdqa %xmm4,%xmm7
+ psrlq $19,%xmm0
+ psllq $3,%xmm7
+ pxor %xmm0,%xmm1
+ psrlq $42,%xmm0
+ pxor %xmm7,%xmm1
+ psllq $42,%xmm7
+ pxor %xmm0,%xmm1
+ movdqa 48(%edx),%xmm0
+ pxor %xmm7,%xmm1
+ movdqa 80(%ebp),%xmm7
+ movq %mm4,%mm1
+ paddq %xmm1,%xmm5
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ paddq %xmm5,%xmm7
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm7,-48(%edx)
+ movdqa %xmm3,%xmm1
+ movdqa %xmm0,%xmm7
+.byte 102,15,58,15,198,8
+ movdqa %xmm2,32(%edx)
+.byte 102,15,58,15,202,8
+ movdqa %xmm0,%xmm2
+ psrlq $7,%xmm0
+ paddq %xmm1,%xmm6
+ movdqa %xmm2,%xmm1
+ psrlq $1,%xmm2
+ psllq $56,%xmm1
+ pxor %xmm2,%xmm0
+ psrlq $7,%xmm2
+ pxor %xmm1,%xmm0
+ psllq $7,%xmm1
+ pxor %xmm2,%xmm0
+ movdqa %xmm5,%xmm2
+ pxor %xmm1,%xmm0
+ movdqa %xmm5,%xmm1
+ psrlq $6,%xmm2
+ paddq %xmm0,%xmm6
+ movdqa %xmm5,%xmm0
+ psrlq $19,%xmm1
+ psllq $3,%xmm0
+ pxor %xmm1,%xmm2
+ psrlq $42,%xmm1
+ pxor %xmm0,%xmm2
+ psllq $42,%xmm0
+ pxor %xmm1,%xmm2
+ movdqa (%edx),%xmm1
+ pxor %xmm0,%xmm2
+ movdqa 96(%ebp),%xmm0
+ movq %mm4,%mm1
+ paddq %xmm2,%xmm6
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ paddq %xmm6,%xmm0
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm0,-32(%edx)
+ movdqa %xmm4,%xmm2
+ movdqa %xmm1,%xmm0
+.byte 102,15,58,15,207,8
+ movdqa %xmm3,48(%edx)
+.byte 102,15,58,15,211,8
+ movdqa %xmm1,%xmm3
+ psrlq $7,%xmm1
+ paddq %xmm2,%xmm7
+ movdqa %xmm3,%xmm2
+ psrlq $1,%xmm3
+ psllq $56,%xmm2
+ pxor %xmm3,%xmm1
+ psrlq $7,%xmm3
+ pxor %xmm2,%xmm1
+ psllq $7,%xmm2
+ pxor %xmm3,%xmm1
+ movdqa %xmm6,%xmm3
+ pxor %xmm2,%xmm1
+ movdqa %xmm6,%xmm2
+ psrlq $6,%xmm3
+ paddq %xmm1,%xmm7
+ movdqa %xmm6,%xmm1
+ psrlq $19,%xmm2
+ psllq $3,%xmm1
+ pxor %xmm2,%xmm3
+ psrlq $42,%xmm2
+ pxor %xmm1,%xmm3
+ psllq $42,%xmm1
+ pxor %xmm2,%xmm3
+ movdqa 16(%edx),%xmm2
+ pxor %xmm1,%xmm3
+ movdqa 112(%ebp),%xmm1
+ movq %mm4,%mm1
+ paddq %xmm3,%xmm7
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ paddq %xmm7,%xmm1
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 40(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 48(%esp),%mm6
+ movdqa %xmm1,-16(%edx)
+ leal 128(%ebp),%ebp
+ decl %ecx
+ jnz .L00800_47_ssse3
+ movdqa (%ebp),%xmm1
+ leal -640(%ebp),%ebp
+ movdqu (%ebx),%xmm0
+.byte 102,15,56,0,193
+ movdqa (%ebp),%xmm3
+ movdqa %xmm1,%xmm2
+ movdqu 16(%ebx),%xmm1
+ paddq %xmm0,%xmm3
+.byte 102,15,56,0,202
+ movq %mm4,%mm1
+ movq -128(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,32(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 56(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -120(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm3,-128(%edx)
+ movdqa 16(%ebp),%xmm4
+ movdqa %xmm2,%xmm3
+ movdqu 32(%ebx),%xmm2
+ paddq %xmm1,%xmm4
+.byte 102,15,56,0,211
+ movq %mm4,%mm1
+ movq -112(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -104(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm4,-112(%edx)
+ movdqa 32(%ebp),%xmm5
+ movdqa %xmm3,%xmm4
+ movdqu 48(%ebx),%xmm3
+ paddq %xmm2,%xmm5
+.byte 102,15,56,0,220
+ movq %mm4,%mm1
+ movq -96(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -88(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
-.align 16
-.L00516_79_sse2:
- movq %mm2,%mm1
- psrlq $1,%mm2
- movq %mm6,%mm7
- psrlq $6,%mm6
- movq %mm2,%mm3
- psrlq $6,%mm2
- movq %mm6,%mm5
- psrlq $13,%mm6
- pxor %mm2,%mm3
- psrlq $1,%mm2
- pxor %mm6,%mm5
- psrlq $42,%mm6
- pxor %mm2,%mm3
- movq 200(%esp),%mm2
- psllq $56,%mm1
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm5,-96(%edx)
+ movdqa 48(%ebp),%xmm6
+ movdqa %xmm4,%xmm5
+ movdqu 64(%ebx),%xmm4
+ paddq %xmm3,%xmm6
+.byte 102,15,56,0,229
+ movq %mm4,%mm1
+ movq -80(%edx),%mm7
pxor %mm6,%mm5
- psllq $3,%mm7
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
- paddq 128(%esp),%mm2
- psllq $7,%mm1
- pxor %mm7,%mm5
- psllq $42,%mm7
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
pxor %mm1,%mm3
- pxor %mm7,%mm5
- paddq %mm5,%mm3
- paddq %mm2,%mm3
- movq %mm3,72(%esp)
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -72(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
- movq 56(%esp),%mm7
+ movdqa %xmm6,-80(%edx)
+ movdqa 64(%ebp),%xmm7
+ movdqa %xmm5,%xmm6
+ movdqu 80(%ebx),%xmm5
+ paddq %xmm4,%xmm7
+.byte 102,15,56,0,238
movq %mm4,%mm1
- movq %mm4,%mm2
+ movq -64(%edx),%mm7
+ pxor %mm6,%mm5
psrlq $14,%mm1
movq %mm4,32(%esp)
- psllq $23,%mm2
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
movq %mm1,%mm3
psrlq $4,%mm1
- pxor %mm2,%mm3
- psllq $23,%mm2
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
pxor %mm1,%mm3
+ movq %mm0,(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
psrlq $23,%mm1
- pxor %mm2,%mm3
- psllq $4,%mm2
+ paddq 56(%esp),%mm7
pxor %mm1,%mm3
- paddq (%ebp),%mm7
- pxor %mm2,%mm3
- pxor %mm6,%mm5
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 24(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
movq 8(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 32(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 40(%esp),%mm6
+ movq %mm4,%mm1
+ movq -56(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,24(%esp)
pand %mm4,%mm5
- movq 16(%esp),%mm2
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
pxor %mm6,%mm5
- movq 24(%esp),%mm4
- paddq %mm5,%mm3
- movq %mm0,(%esp)
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,56(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 48(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 16(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq (%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 24(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 32(%esp),%mm6
+ movdqa %xmm7,-64(%edx)
+ movdqa %xmm0,(%edx)
+ movdqa 80(%ebp),%xmm0
+ movdqa %xmm6,%xmm7
+ movdqu 96(%ebx),%xmm6
+ paddq %xmm5,%xmm0
+.byte 102,15,56,0,247
+ movq %mm4,%mm1
+ movq -48(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,16(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,48(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 40(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 8(%esp),%mm4
paddq %mm7,%mm3
movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
movq %mm0,%mm6
- paddq 72(%esp),%mm3
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 56(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 16(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 24(%esp),%mm6
+ movq %mm4,%mm1
+ movq -40(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,8(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,40(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 32(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq (%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
psrlq $28,%mm5
paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
psllq $25,%mm6
+ movq 48(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 8(%esp),%mm5
+ paddq %mm6,%mm0
+ movq 16(%esp),%mm6
+ movdqa %xmm0,-48(%edx)
+ movdqa %xmm1,16(%edx)
+ movdqa 96(%ebp),%xmm1
+ movdqa %xmm7,%xmm0
+ movdqu 112(%ebx),%xmm7
+ paddq %xmm6,%xmm1
+.byte 102,15,56,0,248
+ movq %mm4,%mm1
+ movq -32(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,32(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 24(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 56(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 40(%esp),%mm1
psrlq $6,%mm5
pxor %mm6,%mm7
psllq $5,%mm6
pxor %mm5,%mm7
+ pxor %mm1,%mm0
psrlq $5,%mm5
pxor %mm6,%mm7
+ pand %mm0,%mm2
psllq $6,%mm6
pxor %mm5,%mm7
- subl $8,%esp
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq (%esp),%mm5
+ paddq %mm6,%mm2
+ movq 8(%esp),%mm6
+ movq %mm4,%mm1
+ movq -24(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,56(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,24(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 16(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 48(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 32(%esp),%mm1
+ psrlq $6,%mm5
pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
+ movq 56(%esp),%mm5
+ paddq %mm6,%mm0
+ movq (%esp),%mm6
+ movdqa %xmm1,-32(%edx)
+ movdqa %xmm2,32(%edx)
+ movdqa 112(%ebp),%xmm2
+ movdqa (%edx),%xmm0
+ paddq %xmm7,%xmm2
+ movq %mm4,%mm1
+ movq -16(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,48(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm0
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm0,16(%esp)
+ paddq %mm5,%mm7
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq 8(%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 40(%esp),%mm4
+ paddq %mm7,%mm3
movq %mm0,%mm5
- por %mm2,%mm0
- movq 88(%esp),%mm6
- pand %mm2,%mm5
- pand %mm1,%mm0
- movq 192(%esp),%mm2
- por %mm0,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm0,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 24(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm0,%mm2
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ pxor %mm7,%mm6
+ movq 48(%esp),%mm5
+ paddq %mm6,%mm2
+ movq 56(%esp),%mm6
+ movq %mm4,%mm1
+ movq -8(%edx),%mm7
+ pxor %mm6,%mm5
+ psrlq $14,%mm1
+ movq %mm4,40(%esp)
+ pand %mm4,%mm5
+ psllq $23,%mm4
+ paddq %mm3,%mm2
+ movq %mm1,%mm3
+ psrlq $4,%mm1
+ pxor %mm6,%mm5
+ pxor %mm4,%mm3
+ psllq $23,%mm4
+ pxor %mm1,%mm3
+ movq %mm2,8(%esp)
paddq %mm5,%mm7
- movq %mm3,%mm0
- movb (%ebp),%dl
- paddq %mm7,%mm0
- addl $8,%ebp
- cmpb $23,%dl
- jne .L00516_79_sse2
- movq 8(%esp),%mm1
- movq 16(%esp),%mm2
- movq 24(%esp),%mm3
+ pxor %mm4,%mm3
+ psrlq $23,%mm1
+ paddq (%esp),%mm7
+ pxor %mm1,%mm3
+ psllq $4,%mm4
+ pxor %mm4,%mm3
+ movq 32(%esp),%mm4
+ paddq %mm7,%mm3
+ movq %mm2,%mm5
+ psrlq $28,%mm5
+ paddq %mm3,%mm4
+ movq %mm2,%mm6
+ movq %mm5,%mm7
+ psllq $25,%mm6
+ movq 16(%esp),%mm1
+ psrlq $6,%mm5
+ pxor %mm6,%mm7
+ psllq $5,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm2
+ psrlq $5,%mm5
+ pxor %mm6,%mm7
+ pand %mm2,%mm0
+ psllq $6,%mm6
+ pxor %mm5,%mm7
+ pxor %mm1,%mm0
+ pxor %mm7,%mm6
movq 40(%esp),%mm5
+ paddq %mm6,%mm0
movq 48(%esp),%mm6
+ movdqa %xmm2,-16(%edx)
+ movq 8(%esp),%mm1
+ paddq %mm3,%mm0
+ movq 24(%esp),%mm3
movq 56(%esp),%mm7
+ pxor %mm1,%mm2
paddq (%esi),%mm0
paddq 8(%esi),%mm1
paddq 16(%esi),%mm2
@@ -286,12 +2280,10 @@ sha512_block_data_order:
movq %mm5,40(%esi)
movq %mm6,48(%esi)
movq %mm7,56(%esi)
- addl $640,%esp
- subl $640,%ebp
- cmpl 88(%esp),%edi
- jb .L003loop_sse2
+ cmpl %eax,%edi
+ jb .L007loop_ssse3
+ movl 76(%edx),%esp
emms
- movl 92(%esp),%esp
popl %edi
popl %esi
popl %ebx
@@ -402,7 +2394,7 @@ sha512_block_data_order:
movl $16,%ecx
.long 2784229001
.align 16
-.L00600_15_x86:
+.L00900_15_x86:
movl 40(%esp),%ecx
movl 44(%esp),%edx
movl %ecx,%esi
@@ -509,9 +2501,9 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $148,%dl
- jne .L00600_15_x86
+ jne .L00900_15_x86
.align 16
-.L00716_79_x86:
+.L01016_79_x86:
movl 312(%esp),%ecx
movl 316(%esp),%edx
movl %ecx,%esi
@@ -684,7 +2676,7 @@ sha512_block_data_order:
subl $8,%esp
leal 8(%ebp),%ebp
cmpb $23,%dl
- jne .L00716_79_x86
+ jne .L01016_79_x86
movl 840(%esp),%esi
movl 844(%esp),%edi
movl (%esi),%eax
@@ -827,10 +2819,12 @@ sha512_block_data_order:
.long 4234509866,1501505948
.long 987167468,1607167915
.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
.size sha512_block_data_order,.-.L_sha512_block_data_order_begin
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/vpaes-x86.s b/secure/lib/libcrypto/i386/vpaes-x86.s
index 7264297..e1dda0f 100644
--- a/secure/lib/libcrypto/i386/vpaes-x86.s
+++ b/secure/lib/libcrypto/i386/vpaes-x86.s
@@ -74,33 +74,33 @@ _vpaes_encrypt_core:
movdqa %xmm6,%xmm1
movdqa (%ebp),%xmm2
pandn %xmm0,%xmm1
- movdqu (%edx),%xmm5
- psrld $4,%xmm1
pand %xmm6,%xmm0
+ movdqu (%edx),%xmm5
.byte 102,15,56,0,208
movdqa 16(%ebp),%xmm0
-.byte 102,15,56,0,193
pxor %xmm5,%xmm2
- pxor %xmm2,%xmm0
+ psrld $4,%xmm1
addl $16,%edx
+.byte 102,15,56,0,193
leal 192(%ebp),%ebx
+ pxor %xmm2,%xmm0
jmp .L000enc_entry
.align 16
.L001enc_loop:
movdqa 32(%ebp),%xmm4
-.byte 102,15,56,0,226
- pxor %xmm5,%xmm4
movdqa 48(%ebp),%xmm0
+.byte 102,15,56,0,226
.byte 102,15,56,0,195
- pxor %xmm4,%xmm0
+ pxor %xmm5,%xmm4
movdqa 64(%ebp),%xmm5
-.byte 102,15,56,0,234
+ pxor %xmm4,%xmm0
movdqa -64(%ebx,%ecx,1),%xmm1
+.byte 102,15,56,0,234
movdqa 80(%ebp),%xmm2
-.byte 102,15,56,0,211
- pxor %xmm5,%xmm2
movdqa (%ebx,%ecx,1),%xmm4
+.byte 102,15,56,0,211
movdqa %xmm0,%xmm3
+ pxor %xmm5,%xmm2
.byte 102,15,56,0,193
addl $16,%edx
pxor %xmm2,%xmm0
@@ -109,28 +109,28 @@ _vpaes_encrypt_core:
pxor %xmm0,%xmm3
.byte 102,15,56,0,193
andl $48,%ecx
- pxor %xmm3,%xmm0
subl $1,%eax
+ pxor %xmm3,%xmm0
.L000enc_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm5
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm5
.byte 102,15,56,0,232
- pxor %xmm1,%xmm0
movdqa %xmm7,%xmm3
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,217
- pxor %xmm5,%xmm3
movdqa %xmm7,%xmm4
+ pxor %xmm5,%xmm3
.byte 102,15,56,0,224
- pxor %xmm5,%xmm4
movdqa %xmm7,%xmm2
+ pxor %xmm5,%xmm4
.byte 102,15,56,0,211
- pxor %xmm0,%xmm2
movdqa %xmm7,%xmm3
- movdqu (%edx),%xmm5
+ pxor %xmm0,%xmm2
.byte 102,15,56,0,220
+ movdqu (%edx),%xmm5
pxor %xmm1,%xmm3
jnz .L001enc_loop
movdqa 96(%ebp),%xmm4
@@ -146,8 +146,8 @@ _vpaes_encrypt_core:
.type _vpaes_decrypt_core,@function
.align 16
_vpaes_decrypt_core:
- movl 240(%edx),%eax
leal 608(%ebp),%ebx
+ movl 240(%edx),%eax
movdqa %xmm6,%xmm1
movdqa -64(%ebx),%xmm2
pandn %xmm0,%xmm1
@@ -170,56 +170,56 @@ _vpaes_decrypt_core:
.align 16
.L003dec_loop:
movdqa -32(%ebx),%xmm4
+ movdqa -16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa -16(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
- addl $16,%edx
-.byte 102,15,56,0,197
movdqa (%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 16(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 16(%ebx),%xmm0
-.byte 102,15,56,0,195
- pxor %xmm4,%xmm0
- subl $1,%eax
.byte 102,15,56,0,197
+.byte 102,15,56,0,203
+ pxor %xmm4,%xmm0
movdqa 32(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 48(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 48(%ebx),%xmm0
-.byte 102,15,56,0,195
- pxor %xmm4,%xmm0
.byte 102,15,56,0,197
+.byte 102,15,56,0,203
+ pxor %xmm4,%xmm0
movdqa 64(%ebx),%xmm4
+ pxor %xmm1,%xmm0
+ movdqa 80(%ebx),%xmm1
.byte 102,15,56,0,226
- pxor %xmm0,%xmm4
- movdqa 80(%ebx),%xmm0
-.byte 102,15,56,0,195
+.byte 102,15,56,0,197
+.byte 102,15,56,0,203
pxor %xmm4,%xmm0
+ addl $16,%edx
.byte 102,15,58,15,237,12
+ pxor %xmm1,%xmm0
+ subl $1,%eax
.L002dec_entry:
movdqa %xmm6,%xmm1
+ movdqa -32(%ebp),%xmm2
pandn %xmm0,%xmm1
- psrld $4,%xmm1
pand %xmm6,%xmm0
- movdqa -32(%ebp),%xmm2
+ psrld $4,%xmm1
.byte 102,15,56,0,208
- pxor %xmm1,%xmm0
movdqa %xmm7,%xmm3
+ pxor %xmm1,%xmm0
.byte 102,15,56,0,217
- pxor %xmm2,%xmm3
movdqa %xmm7,%xmm4
+ pxor %xmm2,%xmm3
.byte 102,15,56,0,224
pxor %xmm2,%xmm4
movdqa %xmm7,%xmm2
.byte 102,15,56,0,211
- pxor %xmm0,%xmm2
movdqa %xmm7,%xmm3
+ pxor %xmm0,%xmm2
.byte 102,15,56,0,220
- pxor %xmm1,%xmm3
movdqu (%edx),%xmm0
+ pxor %xmm1,%xmm3
jnz .L003dec_loop
movdqa 96(%ebx),%xmm4
.byte 102,15,56,0,226
@@ -328,12 +328,12 @@ _vpaes_schedule_core:
.type _vpaes_schedule_192_smear,@function
.align 16
_vpaes_schedule_192_smear:
- pshufd $128,%xmm6,%xmm0
- pxor %xmm0,%xmm6
+ pshufd $128,%xmm6,%xmm1
pshufd $254,%xmm7,%xmm0
+ pxor %xmm1,%xmm6
+ pxor %xmm1,%xmm1
pxor %xmm0,%xmm6
movdqa %xmm6,%xmm0
- pxor %xmm1,%xmm1
movhlps %xmm1,%xmm6
ret
.size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
diff --git a/secure/lib/libcrypto/i386/wp-mmx.s b/secure/lib/libcrypto/i386/wp-mmx.s
index c0a42fd..6ceae96 100644
--- a/secure/lib/libcrypto/i386/wp-mmx.s
+++ b/secure/lib/libcrypto/i386/wp-mmx.s
@@ -67,228 +67,230 @@ whirlpool_block_mmx:
movq 4096(%ebp,%esi,8),%mm0
movl (%esp),%eax
movl 4(%esp),%ebx
- movb %al,%cl
- movb %ah,%dl
+ movzbl %al,%ecx
+ movzbl %ah,%edx
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
movq 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 8(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
movq 6(%ebp,%esi,8),%mm2
movq 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
movq 4(%ebp,%esi,8),%mm4
movq 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 12(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
movq 2(%ebp,%esi,8),%mm6
movq 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 16(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 20(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 24(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 28(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 32(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 36(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 40(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 44(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 48(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 52(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 56(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 60(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
movl 64(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
movl 68(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
movq %mm0,(%esp)
@@ -299,226 +301,226 @@ whirlpool_block_mmx:
movq %mm5,40(%esp)
movq %mm6,48(%esp)
movq %mm7,56(%esp)
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm0
pxor 7(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
movl 72(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm2
pxor 5(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm4
pxor 3(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
movl 76(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm6
pxor 1(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm1
pxor 7(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
movl 80(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm3
pxor 5(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm5
pxor 3(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
movl 84(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm7
pxor 1(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm2
pxor 7(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
movl 88(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm4
pxor 5(%ebp,%edi,8),%mm5
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm6
pxor 3(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
movl 92(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm0
pxor 1(%ebp,%edi,8),%mm1
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm3
pxor 7(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
movl 96(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm5
pxor 5(%ebp,%edi,8),%mm6
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm7
pxor 3(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
movl 100(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm1
pxor 1(%ebp,%edi,8),%mm2
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm4
pxor 7(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
movl 104(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm6
pxor 5(%ebp,%edi,8),%mm7
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm0
pxor 3(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
movl 108(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm2
pxor 1(%ebp,%edi,8),%mm3
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm5
pxor 7(%ebp,%edi,8),%mm6
- movb %al,%cl
- movb %ah,%dl
movl 112(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm7
pxor 5(%ebp,%edi,8),%mm0
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm1
pxor 3(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
movl 116(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm3
pxor 1(%ebp,%edi,8),%mm4
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm6
pxor 7(%ebp,%edi,8),%mm7
- movb %al,%cl
- movb %ah,%dl
movl 120(%esp),%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm0
pxor 5(%ebp,%edi,8),%mm1
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm2
pxor 3(%ebp,%edi,8),%mm3
- movb %bl,%cl
- movb %bh,%dl
movl 124(%esp),%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm4
pxor 1(%ebp,%edi,8),%mm5
- movb %al,%cl
- movb %ah,%dl
+ shrl $16,%eax
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%eax
+ movzbl %ah,%edx
pxor (%ebp,%esi,8),%mm7
pxor 7(%ebp,%edi,8),%mm0
- movb %al,%cl
- movb %ah,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %bh,%edx
pxor 6(%ebp,%esi,8),%mm1
pxor 5(%ebp,%edi,8),%mm2
- movb %bl,%cl
- movb %bh,%dl
+ shrl $16,%ebx
leal (%ecx,%ecx,1),%esi
+ movzbl %bl,%ecx
leal (%edx,%edx,1),%edi
- shrl $16,%ebx
+ movzbl %bh,%edx
pxor 4(%ebp,%esi,8),%mm3
pxor 3(%ebp,%edi,8),%mm4
- movb %bl,%cl
- movb %bh,%dl
leal (%ecx,%ecx,1),%esi
+ movzbl %al,%ecx
leal (%edx,%edx,1),%edi
+ movzbl %ah,%edx
pxor 2(%ebp,%esi,8),%mm5
pxor 1(%ebp,%edi,8),%mm6
leal 128(%esp),%ebx
diff --git a/secure/lib/libcrypto/i386/x86-gf2m.s b/secure/lib/libcrypto/i386/x86-gf2m.s
index 1fcfe38..b7177ed 100644
--- a/secure/lib/libcrypto/i386/x86-gf2m.s
+++ b/secure/lib/libcrypto/i386/x86-gf2m.s
@@ -341,4 +341,4 @@ bn_GF2m_mul_2x2:
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/x86-mont.s b/secure/lib/libcrypto/i386/x86-mont.s
index 0eec75d..26f84f8 100644
--- a/secure/lib/libcrypto/i386/x86-mont.s
+++ b/secure/lib/libcrypto/i386/x86-mont.s
@@ -454,4 +454,4 @@ bn_mul_mont:
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
-.comm OPENSSL_ia32cap_P,8,4
+.comm OPENSSL_ia32cap_P,16,4
diff --git a/secure/lib/libcrypto/i386/x86cpuid.s b/secure/lib/libcrypto/i386/x86cpuid.s
index 815ff69..69622ec 100644
--- a/secure/lib/libcrypto/i386/x86cpuid.s
+++ b/secure/lib/libcrypto/i386/x86cpuid.s
@@ -23,6 +23,8 @@ OPENSSL_ia32_cpuid:
xorl %eax,%eax
btl $21,%ecx
jnc .L000nocpuid
+ movl 20(%esp),%esi
+ movl %eax,8(%esi)
.byte 0x0f,0xa2
movl %eax,%edi
xorl %eax,%eax
@@ -73,28 +75,36 @@ OPENSSL_ia32_cpuid:
andl $4026531839,%edx
jmp .L002generic
.L001intel:
+ cmpl $7,%edi
+ jb .L003cacheinfo
+ movl 20(%esp),%esi
+ movl $7,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,8(%esi)
+.L003cacheinfo:
cmpl $4,%edi
movl $-1,%edi
- jb .L003nocacheinfo
+ jb .L004nocacheinfo
movl $4,%eax
movl $0,%ecx
.byte 0x0f,0xa2
movl %eax,%edi
shrl $14,%edi
andl $4095,%edi
-.L003nocacheinfo:
+.L004nocacheinfo:
movl $1,%eax
xorl %ecx,%ecx
.byte 0x0f,0xa2
andl $3220176895,%edx
cmpl $0,%ebp
- jne .L004notintel
+ jne .L005notintel
orl $1073741824,%edx
andb $15,%ah
cmpb $15,%ah
- jne .L004notintel
+ jne .L005notintel
orl $1048576,%edx
-.L004notintel:
+.L005notintel:
btl $28,%edx
jnc .L002generic
andl $4026531839,%edx
@@ -111,20 +121,22 @@ OPENSSL_ia32_cpuid:
movl %edx,%esi
orl %ecx,%ebp
btl $27,%ecx
- jnc .L005clear_avx
+ jnc .L006clear_avx
xorl %ecx,%ecx
.byte 15,1,208
andl $6,%eax
cmpl $6,%eax
- je .L006done
+ je .L007done
cmpl $2,%eax
- je .L005clear_avx
-.L007clear_xmm:
+ je .L006clear_avx
+.L008clear_xmm:
andl $4261412861,%ebp
andl $4278190079,%esi
-.L005clear_avx:
+.L006clear_avx:
andl $4026525695,%ebp
-.L006done:
+ movl 20(%esp),%edi
+ andl $4294967263,8(%edi)
+.L007done:
movl %esi,%eax
movl %ebp,%edx
.L000nocpuid:
@@ -143,9 +155,9 @@ OPENSSL_rdtsc:
xorl %edx,%edx
leal OPENSSL_ia32cap_P,%ecx
btl $4,(%ecx)
- jnc .L008notsc
+ jnc .L009notsc
.byte 0x0f,0x31
-.L008notsc:
+.L009notsc:
ret
.size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin
.globl OPENSSL_instrument_halt
@@ -155,14 +167,14 @@ OPENSSL_instrument_halt:
.L_OPENSSL_instrument_halt_begin:
leal OPENSSL_ia32cap_P,%ecx
btl $4,(%ecx)
- jnc .L009nohalt
+ jnc .L010nohalt
.long 2421723150
andl $3,%eax
- jnz .L009nohalt
+ jnz .L010nohalt
pushfl
popl %eax
btl $9,%eax
- jnc .L009nohalt
+ jnc .L010nohalt
.byte 0x0f,0x31
pushl %edx
pushl %eax
@@ -172,7 +184,7 @@ OPENSSL_instrument_halt:
sbbl 4(%esp),%edx
addl $8,%esp
ret
-.L009nohalt:
+.L010nohalt:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -185,21 +197,21 @@ OPENSSL_far_spin:
pushfl
popl %eax
btl $9,%eax
- jnc .L010nospin
+ jnc .L011nospin
movl 4(%esp),%eax
movl 8(%esp),%ecx
.long 2430111262
xorl %eax,%eax
movl (%ecx),%edx
- jmp .L011spin
+ jmp .L012spin
.align 16
-.L011spin:
+.L012spin:
incl %eax
cmpl (%ecx),%edx
- je .L011spin
+ je .L012spin
.long 529567888
ret
-.L010nospin:
+.L011nospin:
xorl %eax,%eax
xorl %edx,%edx
ret
@@ -214,10 +226,10 @@ OPENSSL_wipe_cpu:
leal OPENSSL_ia32cap_P,%ecx
movl (%ecx),%ecx
btl $1,(%ecx)
- jnc .L012no_x87
+ jnc .L013no_x87
andl $83886080,%ecx
cmpl $83886080,%ecx
- jne .L013no_sse2
+ jne .L014no_sse2
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
@@ -226,9 +238,9 @@ OPENSSL_wipe_cpu:
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
pxor %xmm7,%xmm7
-.L013no_sse2:
+.L014no_sse2:
.long 4007259865,4007259865,4007259865,4007259865,2430851995
-.L012no_x87:
+.L013no_x87:
leal 4(%esp),%eax
ret
.size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin
@@ -242,11 +254,11 @@ OPENSSL_atomic_add:
pushl %ebx
nop
movl (%edx),%eax
-.L014spin:
+.L015spin:
leal (%eax,%ecx,1),%ebx
nop
.long 447811568
- jne .L014spin
+ jne .L015spin
movl %ebx,%eax
popl %ebx
ret
@@ -287,32 +299,32 @@ OPENSSL_cleanse:
movl 8(%esp),%ecx
xorl %eax,%eax
cmpl $7,%ecx
- jae .L015lot
+ jae .L016lot
cmpl $0,%ecx
- je .L016ret
-.L017little:
+ je .L017ret
+.L018little:
movb %al,(%edx)
subl $1,%ecx
leal 1(%edx),%edx
- jnz .L017little
-.L016ret:
+ jnz .L018little
+.L017ret:
ret
.align 16
-.L015lot:
+.L016lot:
testl $3,%edx
- jz .L018aligned
+ jz .L019aligned
movb %al,(%edx)
leal -1(%ecx),%ecx
leal 1(%edx),%edx
- jmp .L015lot
-.L018aligned:
+ jmp .L016lot
+.L019aligned:
movl %eax,(%edx)
leal -4(%ecx),%ecx
testl $-4,%ecx
leal 4(%edx),%edx
- jnz .L018aligned
+ jnz .L019aligned
cmpl $0,%ecx
- jne .L017little
+ jne .L018little
ret
.size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
.globl OPENSSL_ia32_rdrand
@@ -321,15 +333,32 @@ OPENSSL_cleanse:
OPENSSL_ia32_rdrand:
.L_OPENSSL_ia32_rdrand_begin:
movl $8,%ecx
-.L019loop:
+.L020loop:
.byte 15,199,240
- jc .L020break
- loop .L019loop
-.L020break:
+ jc .L021break
+ loop .L020loop
+.L021break:
cmpl $0,%eax
cmovel %ecx,%eax
ret
.size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin
-.comm OPENSSL_ia32cap_P,8,4
+.globl OPENSSL_ia32_rdseed
+.type OPENSSL_ia32_rdseed,@function
+.align 16
+OPENSSL_ia32_rdseed:
+.L_OPENSSL_ia32_rdseed_begin:
+ movl $8,%ecx
+.L022loop:
+.byte 15,199,248
+ jc .L023break
+ loop .L022loop
+.L023break:
+ cmpl $0,%eax
+ cmovel %ecx,%eax
+ ret
+.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin
+.hidden OPENSSL_cpuid_setup
+.hidden OPENSSL_ia32cap_P
+.comm OPENSSL_ia32cap_P,16,4
.section .init
call OPENSSL_cpuid_setup
OpenPOWER on IntegriCloud