diff options
Diffstat (limited to 'secure/lib/libcrypto/i386')
-rw-r--r-- | secure/lib/libcrypto/i386/aes-586.s | 480 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/aesni-x86.s | 1159 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/bn-586.s | 2 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/cast-586.s | 934 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/des-586.s | 5 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/ghash-x86.s | 226 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/rc4-586.s | 4 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/sha1-586.s | 1335 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/sha256-586.s | 4551 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/sha512-586.s | 2246 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/vpaes-x86.s | 88 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/wp-mmx.s | 322 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/x86-gf2m.s | 2 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/x86-mont.s | 2 | ||||
-rw-r--r-- | secure/lib/libcrypto/i386/x86cpuid.s | 117 |
15 files changed, 8675 insertions, 2798 deletions
diff --git a/secure/lib/libcrypto/i386/aes-586.s b/secure/lib/libcrypto/i386/aes-586.s index 704c53c..bb66276 100644 --- a/secure/lib/libcrypto/i386/aes-586.s +++ b/secure/lib/libcrypto/i386/aes-586.s @@ -101,74 +101,78 @@ _x86_AES_encrypt_compact: xorl %ecx,%edx movl %esi,%ecx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ecx,%edi xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%ecx - rorl $16,%ebp - xorl %ebp,%ecx - rorl $8,%ebp - xorl %ebp,%ecx - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %edx,%edi xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%edx - rorl $16,%ebp - xorl %ebp,%edx - rorl $8,%ebp - xorl %ebp,%edx - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %eax,%edi xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp xorl %esi,%eax - rorl $16,%ebp - xorl %ebp,%eax - rorl $8,%ebp - xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%edi - subl %ebp,%esi + movl %ebp,%esi + shrl $7,%ebp andl $4278124286,%edi - andl $454761243,%esi + subl %ebp,%esi movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp xorl %edi,%esi + movl %ebx,%edi xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi roll $24,%ebx + xorl %edi,%esi xorl %esi,%ebx - rorl $16,%ebp - xorl %ebp,%ebx - rorl $8,%ebp - xorl %ebp,%ebx movl 20(%esp),%edi movl 28(%esp),%ebp addl $16,%edi @@ -290,74 +294,76 @@ _sse_AES_encrypt_compact: pshufw $13,%mm4,%mm5 movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx - pshufw $13,%mm0,%mm2 movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx - shll $8,%edx shrl $16,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi - orl %esi,%ecx pshufw $8,%mm4,%mm6 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $8,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $24,%esi orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi movd %mm2,%eax - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi - orl %esi,%ecx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx movd %mm6,%ebx - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm1 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%ecx shrl $16,%ebx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi - shll $16,%esi orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx punpckldq %mm1,%mm0 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi - orl %esi,%ecx - andl $255,%eax + andl $255,%ebx movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi orl %eax,%edx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm4 - andl $255,%ebx movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 movd %edx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -1130,28 +1136,28 @@ _x86_AES_decrypt_compact: movzbl -128(%ebp,%eax,1),%eax shll $24,%eax xorl %eax,%edx - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%eax subl %edi,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %ecx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ebp subl %edi,%esi @@ -1162,39 +1168,39 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %eax,%ecx xorl %ebp,%eax - roll $24,%eax xorl %ebx,%ecx xorl %ebp,%ebx - roll $16,%ebx + roll $24,%eax xorl %ebp,%ecx - roll $8,%ebp + roll $16,%ebx xorl %eax,%ecx + roll $8,%ebp xorl %ebx,%ecx movl 4(%esp),%eax xorl %ebp,%ecx movl %ecx,12(%esp) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebx subl %edi,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %edx,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%ebp subl %edi,%esi @@ -1205,39 +1211,39 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ebx,%edx xorl %ebp,%ebx - roll $24,%ebx xorl %ecx,%edx xorl %ebp,%ecx - roll $16,%ecx + roll $24,%ebx xorl %ebp,%edx - roll $8,%ebp + roll $16,%ecx xorl %ebx,%edx + roll $8,%ebp xorl %ecx,%edx movl 8(%esp),%ebx xorl %ebp,%edx movl %edx,16(%esp) - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi shrl $7,%edi leal (%eax,%eax,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %eax,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1248,37 +1254,37 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ecx,%eax xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%eax xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%eax - roll $8,%ebp + roll $16,%edx xorl %ecx,%eax + roll $8,%ebp xorl %edx,%eax xorl %ebp,%eax - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%edi + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi shrl $7,%edi leal (%ebx,%ebx,1),%ecx subl %edi,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi shrl $7,%edi leal (%ecx,%ecx,1),%edx subl %edi,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%edi + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi shrl $7,%edi leal (%edx,%edx,1),%ebp subl %edi,%esi @@ -1289,13 +1295,13 @@ _x86_AES_decrypt_compact: xorl %esi,%ebp xorl %ecx,%ebx xorl %ebp,%ecx - roll $24,%ecx xorl %edx,%ebx xorl %ebp,%edx - roll $16,%edx + roll $24,%ecx xorl %ebp,%ebx - roll $8,%ebp + roll $16,%edx xorl %ecx,%ebx + roll $8,%ebp xorl %edx,%ebx movl 12(%esp),%ecx xorl %ebp,%ebx @@ -1414,77 +1420,79 @@ _sse_AES_decrypt_compact: .align 16 .L007loop: pshufw $12,%mm0,%mm1 - movd %mm1,%eax pshufw $9,%mm4,%mm5 - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%ecx + movd %mm1,%eax movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax shll $8,%edx - pshufw $6,%mm0,%mm2 - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $16,%esi + pshufw $3,%mm4,%mm6 orl %esi,%ecx - shrl $16,%eax - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $24,%esi - orl %esi,%edx shrl $16,%ebx - pshufw $3,%mm4,%mm6 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%esi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shll $24,%esi orl %esi,%ecx - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi shll $8,%esi - orl %esi,%ecx - movd %ecx,%mm0 - movzbl %al,%esi movd %mm2,%eax - movzbl -128(%ebp,%esi,1),%ecx - shll $16,%ecx - movzbl %bl,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi movd %mm6,%ebx - movzbl -128(%ebp,%esi,1),%esi + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi orl %esi,%ecx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi orl %esi,%edx - movzbl %bl,%esi - movzbl -128(%ebp,%esi,1),%esi + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi shll $16,%esi - orl %esi,%edx - movd %edx,%mm1 - movzbl %ah,%esi - movzbl -128(%ebp,%esi,1),%edx - shll $8,%edx - movzbl %bh,%esi shrl $16,%eax - movzbl -128(%ebp,%esi,1),%esi - shll $24,%esi orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi shrl $16,%ebx - punpckldq %mm1,%mm0 - movzbl %bh,%esi - movzbl -128(%ebp,%esi,1),%esi shll $8,%esi - orl %esi,%ecx + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi orl %ebx,%edx - movzbl %al,%esi - movzbl -128(%ebp,%esi,1),%esi shll $16,%esi - orl %esi,%edx - movd %edx,%mm4 - movzbl %ah,%eax movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx shll $24,%eax orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 movd %ecx,%mm5 punpckldq %mm5,%mm4 addl $16,%edi @@ -3046,30 +3054,30 @@ private_AES_set_decrypt_key: .align 4 .L056permute: addl $16,%edi - movl %eax,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi xorl %eax,%ebx - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp xorl %eax,%ecx subl %ebp,%esi andl $4278124286,%edx @@ -3090,30 +3098,30 @@ private_AES_set_decrypt_key: movl %ebp,%ebx xorl %edx,%eax movl %eax,(%edi) - movl %ebx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ecx andl $454761243,%esi - xorl %ecx,%esi - movl %esi,%ecx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi xorl %ebx,%ecx - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp xorl %ebx,%edx subl %ebp,%esi andl $4278124286,%eax @@ -3134,30 +3142,30 @@ private_AES_set_decrypt_key: movl %ebp,%ecx xorl %eax,%ebx movl %ebx,4(%edi) - movl %ecx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %ecx,%ebp leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%edx andl $454761243,%esi - xorl %edx,%esi - movl %esi,%edx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi xorl %ecx,%edx - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp xorl %ecx,%eax subl %ebp,%esi andl $4278124286,%ebx @@ -3178,30 +3186,30 @@ private_AES_set_decrypt_key: movl %ebp,%edx xorl %ebx,%ecx movl %ecx,8(%edi) - movl %edx,%esi - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + movl $2155905152,%ebp + andl %edx,%ebp leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%eax andl $454761243,%esi - xorl %eax,%esi - movl %esi,%eax - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp subl %ebp,%esi andl $4278124286,%ebx andl $454761243,%esi xorl %edx,%eax - xorl %ebx,%esi - movl %esi,%ebx - andl $2155905152,%esi - movl %esi,%ebp - shrl $7,%ebp + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp xorl %edx,%ebx subl %ebp,%esi andl $4278124286,%ecx @@ -3234,4 +3242,4 @@ private_AES_set_decrypt_key: .byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 .byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/aesni-x86.s b/secure/lib/libcrypto/i386/aesni-x86.s index e05096f..5b294bd 100644 --- a/secure/lib/libcrypto/i386/aesni-x86.s +++ b/secure/lib/libcrypto/i386/aesni-x86.s @@ -22,7 +22,10 @@ aesni_encrypt: leal 16(%edx),%edx jnz .L000enc1_loop_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_encrypt,.-.L_aesni_encrypt_begin .globl aesni_decrypt @@ -46,32 +49,90 @@ aesni_decrypt: leal 16(%edx),%edx jnz .L001dec1_loop_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 .type _aesni_encrypt3,@function .align 16 _aesni_encrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -.L002enc3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 - movups (%edx),%xmm0 - jnz .L002enc3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -84,25 +145,26 @@ _aesni_encrypt3: .align 16 _aesni_decrypt3: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 - movups (%edx),%xmm0 -.L003dec3_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 - movups (%edx),%xmm0 - jnz .L003dec3_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -116,27 +178,29 @@ _aesni_decrypt3: _aesni_encrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -.L004enc4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 .byte 102,15,56,220,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 - movups (%edx),%xmm0 - jnz .L004enc4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -152,27 +216,29 @@ _aesni_encrypt4: _aesni_decrypt4: movups (%edx),%xmm0 movups 16(%edx),%xmm1 - shrl $1,%ecx - leal 32(%edx),%edx + shll $4,%ecx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 pxor %xmm0,%xmm4 pxor %xmm0,%xmm5 - movups (%edx),%xmm0 -.L005dec4_loop: + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 .byte 102,15,56,222,233 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 - movups (%edx),%xmm0 - jnz .L005dec4_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -187,45 +253,42 @@ _aesni_decrypt4: .align 16 _aesni_encrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 pxor %xmm0,%xmm7 -.byte 102,15,56,220,241 - movups (%edx),%xmm0 -.byte 102,15,56,220,249 - jmp .L_aesni_encrypt6_enter + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L008_aesni_encrypt6_inner .align 16 -.L006enc6_loop: +.L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 - decl %ecx .byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.align 16 .L_aesni_encrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 .byte 102,15,56,220,216 - leal 32(%edx),%edx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 - movups (%edx),%xmm0 - jnz .L006enc6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -244,45 +307,42 @@ _aesni_encrypt6: .align 16 _aesni_decrypt6: movups (%edx),%xmm0 - shrl $1,%ecx + shll $4,%ecx movups 16(%edx),%xmm1 - leal 32(%edx),%edx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 -.byte 102,15,56,222,209 pxor %xmm0,%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor %xmm0,%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor %xmm0,%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 pxor %xmm0,%xmm7 -.byte 102,15,56,222,241 - movups (%edx),%xmm0 -.byte 102,15,56,222,249 - jmp .L_aesni_decrypt6_enter + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L010_aesni_decrypt6_inner .align 16 -.L007dec6_loop: +.L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 - decl %ecx .byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.align 16 .L_aesni_decrypt6_enter: - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,222,208 .byte 102,15,56,222,216 - leal 32(%edx),%edx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 - movups (%edx),%xmm0 - jnz .L007dec6_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -312,14 +372,14 @@ aesni_ecb_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz .L008ecb_ret + jz .L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz .L009ecb_decrypt + jz .L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L010ecb_enc_tail + jb .L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -328,9 +388,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L011ecb_enc_loop6_enter + jmp .L015ecb_enc_loop6_enter .align 16 -.L012ecb_enc_loop6: +.L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -345,12 +405,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L011ecb_enc_loop6_enter: +.L015ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L012ecb_enc_loop6 + jnc .L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -359,18 +419,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L008ecb_ret -.L010ecb_enc_tail: + jz .L012ecb_ret +.L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L013ecb_enc_one + jb .L017ecb_enc_one movups 16(%esi),%xmm3 - je .L014ecb_enc_two + je .L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L015ecb_enc_three + jb .L019ecb_enc_three movups 48(%esi),%xmm5 - je .L016ecb_enc_four + je .L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 @@ -379,50 +439,49 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L013ecb_enc_one: +.L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L017enc1_loop_3: +.L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L017enc1_loop_3 + jnz .L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L014ecb_enc_two: - xorps %xmm4,%xmm4 - call _aesni_encrypt3 +.L018ecb_enc_two: + call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L015ecb_enc_three: +.L019ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L016ecb_enc_four: +.L020ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L009ecb_decrypt: +.L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L018ecb_dec_tail + jb .L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -431,9 +490,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L019ecb_dec_loop6_enter + jmp .L023ecb_dec_loop6_enter .align 16 -.L020ecb_dec_loop6: +.L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -448,12 +507,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L019ecb_dec_loop6_enter: +.L023ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L020ecb_dec_loop6 + jnc .L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -462,18 +521,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L008ecb_ret -.L018ecb_dec_tail: + jz .L012ecb_ret +.L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L021ecb_dec_one + jb .L025ecb_dec_one movups 16(%esi),%xmm3 - je .L022ecb_dec_two + je .L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L023ecb_dec_three + jb .L027ecb_dec_three movups 48(%esi),%xmm5 - je .L024ecb_dec_four + je .L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 @@ -482,44 +541,51 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L021ecb_dec_one: +.L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L025dec1_loop_4: +.L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L025dec1_loop_4 + jnz .L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L022ecb_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 +.L026ecb_dec_two: + call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L023ecb_dec_three: +.L027ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L008ecb_ret + jmp .L012ecb_ret .align 16 -.L024ecb_dec_four: +.L028ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L008ecb_ret: +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -558,48 +624,56 @@ aesni_ccm64_encrypt_blocks: movl %ebp,20(%esp) movl %ebp,24(%esp) movl %ebp,28(%esp) - shrl $1,%ecx + shll $4,%ecx + movl $16,%ebx leal (%edx),%ebp movdqa (%esp),%xmm5 movdqa %xmm7,%xmm2 - movl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx .byte 102,15,56,0,253 -.L026ccm64_enc_outer: +.L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 xorps %xmm0,%xmm2 movups 16(%ebp),%xmm1 xorps %xmm6,%xmm0 - leal 32(%ebp),%edx xorps %xmm0,%xmm3 - movups (%edx),%xmm0 -.L027ccm64_enc2_loop: + movups 32(%ebp),%xmm0 +.L031ccm64_enc2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz .L027ccm64_enc2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 + decl %eax .byte 102,15,56,221,208 .byte 102,15,56,221,216 - decl %eax leal 16(%esi),%esi xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) - leal 16(%edi),%edi .byte 102,15,56,0,213 - jnz .L026ccm64_enc_outer + leal 16(%edi),%edi + jnz .L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -647,71 +721,82 @@ aesni_ccm64_decrypt_blocks: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L028enc1_loop_5: +.L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L028enc1_loop_5 + jnz .L032enc1_loop_5 .byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx movups (%esi),%xmm6 paddq 16(%esp),%xmm7 leal 16(%esi),%esi - jmp .L029ccm64_dec_outer + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L033ccm64_dec_outer .align 16 -.L029ccm64_dec_outer: +.L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 - movl %ebx,%ecx movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz .L030ccm64_dec_break + jz .L034ccm64_dec_break movups (%ebp),%xmm0 - shrl $1,%ecx + movl %ebx,%ecx movups 16(%ebp),%xmm1 xorps %xmm0,%xmm6 - leal 32(%ebp),%edx xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 - movups (%edx),%xmm0 -.L031ccm64_dec2_loop: + movups 32(%ebp),%xmm0 +.L035ccm64_dec2_loop: .byte 102,15,56,220,209 - decl %ecx .byte 102,15,56,220,217 - movups 16(%edx),%xmm1 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx .byte 102,15,56,220,208 - leal 32(%edx),%edx .byte 102,15,56,220,216 - movups (%edx),%xmm0 - jnz .L031ccm64_dec2_loop + movups -16(%edx,%ecx,1),%xmm0 + jnz .L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 .byte 102,15,56,220,217 - leal 16(%esi),%esi .byte 102,15,56,221,208 .byte 102,15,56,221,216 - jmp .L029ccm64_dec_outer + leal 16(%esi),%esi + jmp .L033ccm64_dec_outer .align 16 -.L030ccm64_dec_break: +.L034ccm64_dec_break: + movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 movups 16(%edx),%xmm1 xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -.L032enc1_loop_6: +.L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L032enc1_loop_6 + jnz .L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -737,7 +822,7 @@ aesni_ctr32_encrypt_blocks: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je .L033ctr32_one_shortcut + je .L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -753,63 +838,59 @@ aesni_ctr32_encrypt_blocks: .byte 102,15,58,34,253,3 movl 240(%edx),%ecx bswap %ebx - pxor %xmm1,%xmm1 pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movdqa (%esp),%xmm2 -.byte 102,15,58,34,203,0 +.byte 102,15,58,34,195,0 leal 3(%ebx),%ebp -.byte 102,15,58,34,197,0 +.byte 102,15,58,34,205,0 incl %ebx -.byte 102,15,58,34,203,1 +.byte 102,15,58,34,195,1 incl %ebp -.byte 102,15,58,34,197,1 +.byte 102,15,58,34,205,1 incl %ebx -.byte 102,15,58,34,203,2 +.byte 102,15,58,34,195,2 incl %ebp -.byte 102,15,58,34,197,2 - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 - movdqa %xmm0,64(%esp) +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) .byte 102,15,56,0,194 - pshufd $192,%xmm1,%xmm2 - pshufd $128,%xmm1,%xmm3 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb .L034ctr32_tail + jb .L038ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx movdqa %xmm7,32(%esp) - shrl $1,%ecx movl %edx,%ebp - movl %ecx,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp .L035ctr32_loop6 -.align 16 -.L035ctr32_loop6: - pshufd $64,%xmm1,%xmm4 - movdqa 32(%esp),%xmm1 - pshufd $192,%xmm0,%xmm5 - por %xmm1,%xmm2 - pshufd $128,%xmm0,%xmm6 - por %xmm1,%xmm3 - pshufd $64,%xmm0,%xmm7 - por %xmm1,%xmm4 - por %xmm1,%xmm5 - por %xmm1,%xmm6 - por %xmm1,%xmm7 - movups (%ebp),%xmm0 - movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx - decl %ecx + jmp .L039ctr32_loop6 +.align 16 +.L039ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 pxor %xmm0,%xmm3 -.byte 102,15,56,220,209 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 pxor %xmm0,%xmm4 -.byte 102,15,56,220,217 pxor %xmm0,%xmm5 -.byte 102,15,56,220,225 +.byte 102,15,56,220,209 pxor %xmm0,%xmm6 -.byte 102,15,56,220,233 pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movups (%esi),%xmm1 @@ -820,51 +901,51 @@ aesni_ctr32_encrypt_blocks: movups %xmm2,(%edi) movdqa 16(%esp),%xmm0 xorps %xmm1,%xmm4 - movdqa 48(%esp),%xmm1 + movdqa 64(%esp),%xmm1 movups %xmm3,16(%edi) movups %xmm4,32(%edi) paddd %xmm0,%xmm1 - paddd 64(%esp),%xmm0 + paddd 48(%esp),%xmm0 movdqa (%esp),%xmm2 movups 48(%esi),%xmm3 movups 64(%esi),%xmm4 xorps %xmm3,%xmm5 movups 80(%esi),%xmm3 leal 96(%esi),%esi - movdqa %xmm1,48(%esp) -.byte 102,15,56,0,202 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 xorps %xmm4,%xmm6 movups %xmm5,48(%edi) xorps %xmm3,%xmm7 - movdqa %xmm0,64(%esp) -.byte 102,15,56,0,194 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 movups %xmm6,64(%edi) - pshufd $192,%xmm1,%xmm2 + pshufd $192,%xmm0,%xmm2 movups %xmm7,80(%edi) leal 96(%edi),%edi - movl %ebx,%ecx - pshufd $128,%xmm1,%xmm3 + pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc .L035ctr32_loop6 + jnc .L039ctr32_loop6 addl $6,%eax - jz .L036ctr32_ret + jz .L040ctr32_ret + movdqu (%ebp),%xmm7 movl %ebp,%edx - leal 1(,%ecx,2),%ecx - movdqa 32(%esp),%xmm7 -.L034ctr32_tail: + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb .L037ctr32_one - pshufd $64,%xmm1,%xmm4 + jb .L041ctr32_one + pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je .L038ctr32_two - pshufd $192,%xmm0,%xmm5 + je .L042ctr32_two + pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb .L039ctr32_three - pshufd $128,%xmm0,%xmm6 + jb .L043ctr32_three + pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je .L040ctr32_four + je .L044ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 @@ -882,39 +963,39 @@ aesni_ctr32_encrypt_blocks: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L036ctr32_ret + jmp .L040ctr32_ret .align 16 -.L033ctr32_one_shortcut: +.L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -.L037ctr32_one: +.L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L041enc1_loop_7: +.L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L041enc1_loop_7 + jnz .L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp .L036ctr32_ret + jmp .L040ctr32_ret .align 16 -.L038ctr32_two: - call _aesni_encrypt3 +.L042ctr32_two: + call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L036ctr32_ret + jmp .L040ctr32_ret .align 16 -.L039ctr32_three: +.L043ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -925,9 +1006,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L036ctr32_ret + jmp .L040ctr32_ret .align 16 -.L040ctr32_four: +.L044ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -941,7 +1022,18 @@ aesni_ctr32_encrypt_blocks: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L036ctr32_ret: +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi @@ -966,12 +1058,12 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L042enc1_loop_8: +.L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L042enc1_loop_8 + jnz .L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -995,12 +1087,14 @@ aesni_xts_encrypt: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc .L043xts_enc_short - shrl $1,%ecx - movl %ecx,%ebx - jmp .L044xts_enc_loop6 + jc .L047xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L048xts_enc_loop6 .align 16 -.L044xts_enc_loop6: +.L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1036,6 +1130,7 @@ aesni_xts_encrypt: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1051,19 +1146,17 @@ aesni_xts_encrypt: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,220,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,220,217 +.byte 102,15,56,220,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,220,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,220,233 +.byte 102,15,56,220,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 .byte 102,15,56,220,241 - movups (%edx),%xmm0 .byte 102,15,56,220,249 call .L_aesni_encrypt6_enter movdqa 80(%esp),%xmm1 @@ -1088,26 +1181,25 @@ aesni_xts_encrypt: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc .L044xts_enc_loop6 - leal 1(,%ecx,2),%ecx + jnc .L048xts_enc_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L043xts_enc_short: +.L047xts_enc_short: addl $96,%eax - jz .L045xts_enc_done6x + jz .L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L046xts_enc_one + jb .L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L047xts_enc_two + je .L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1116,7 +1208,7 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L048xts_enc_three + jb .L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1126,7 +1218,7 @@ aesni_xts_encrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L049xts_enc_four + je .L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1158,9 +1250,9 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L050xts_enc_done + jmp .L054xts_enc_done .align 16 -.L046xts_enc_one: +.L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1168,37 +1260,36 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L051enc1_loop_9: +.L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L051enc1_loop_9 + jnz .L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L050xts_enc_done + jmp .L054xts_enc_done .align 16 -.L047xts_enc_two: +.L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - xorps %xmm4,%xmm4 - call _aesni_encrypt3 + call _aesni_encrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L050xts_enc_done + jmp .L054xts_enc_done .align 16 -.L048xts_enc_three: +.L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1216,9 +1307,9 @@ aesni_xts_encrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L050xts_enc_done + jmp .L054xts_enc_done .align 16 -.L049xts_enc_four: +.L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1240,28 +1331,28 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L050xts_enc_done + jmp .L054xts_enc_done .align 16 -.L045xts_enc_done6x: +.L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L052xts_enc_ret + jz .L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp .L053xts_enc_steal + jmp .L057xts_enc_steal .align 16 -.L050xts_enc_done: +.L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L052xts_enc_ret + jz .L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -.L053xts_enc_steal: +.L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1269,7 +1360,7 @@ aesni_xts_encrypt: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L053xts_enc_steal + jnz .L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1279,16 +1370,30 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L054enc1_loop_10: +.L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L054enc1_loop_10 + jnz .L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -.L052xts_enc_ret: +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1313,12 +1418,12 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L055enc1_loop_11: +.L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L055enc1_loop_11 + jnz .L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1347,12 +1452,14 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc .L056xts_dec_short - shrl $1,%ecx - movl %ecx,%ebx - jmp .L057xts_dec_loop6 + jc .L060xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L061xts_dec_loop6 .align 16 -.L057xts_dec_loop6: +.L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1388,6 +1495,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm7 movups (%esi),%xmm2 pxor %xmm1,%xmm7 + movl %ebx,%ecx movdqu 16(%esi),%xmm3 xorps %xmm0,%xmm2 movdqu 32(%esi),%xmm4 @@ -1403,19 +1511,17 @@ aesni_xts_decrypt: movdqa %xmm7,80(%esp) pxor %xmm1,%xmm7 movups 16(%ebp),%xmm1 - leal 32(%ebp),%edx pxor 16(%esp),%xmm3 -.byte 102,15,56,222,209 pxor 32(%esp),%xmm4 -.byte 102,15,56,222,217 +.byte 102,15,56,222,209 pxor 48(%esp),%xmm5 - decl %ecx -.byte 102,15,56,222,225 pxor 64(%esp),%xmm6 -.byte 102,15,56,222,233 +.byte 102,15,56,222,217 pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 .byte 102,15,56,222,241 - movups (%edx),%xmm0 .byte 102,15,56,222,249 call .L_aesni_decrypt6_enter movdqa 80(%esp),%xmm1 @@ -1440,26 +1546,25 @@ aesni_xts_decrypt: paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 - movl %ebx,%ecx pxor %xmm2,%xmm1 subl $96,%eax - jnc .L057xts_dec_loop6 - leal 1(,%ecx,2),%ecx + jnc .L061xts_dec_loop6 + movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L056xts_dec_short: +.L060xts_dec_short: addl $96,%eax - jz .L058xts_dec_done6x + jz .L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L059xts_dec_one + jb .L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L060xts_dec_two + je .L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1468,7 +1573,7 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L061xts_dec_three + jb .L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1478,7 +1583,7 @@ aesni_xts_decrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L062xts_dec_four + je .L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1510,9 +1615,9 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L063xts_dec_done + jmp .L067xts_dec_done .align 16 -.L059xts_dec_one: +.L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1520,36 +1625,36 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L064dec1_loop_12: +.L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L064dec1_loop_12 + jnz .L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L063xts_dec_done + jmp .L067xts_dec_done .align 16 -.L060xts_dec_two: +.L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 leal 32(%esi),%esi xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 - call _aesni_decrypt3 + call _aesni_decrypt2 xorps %xmm5,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L063xts_dec_done + jmp .L067xts_dec_done .align 16 -.L061xts_dec_three: +.L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1567,9 +1672,9 @@ aesni_xts_decrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L063xts_dec_done + jmp .L067xts_dec_done .align 16 -.L062xts_dec_four: +.L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1591,20 +1696,20 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L063xts_dec_done + jmp .L067xts_dec_done .align 16 -.L058xts_dec_done6x: +.L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L065xts_dec_ret + jz .L069xts_dec_ret movl %eax,112(%esp) - jmp .L066xts_dec_only_one_more + jmp .L070xts_dec_only_one_more .align 16 -.L063xts_dec_done: +.L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L065xts_dec_ret + jz .L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1614,7 +1719,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -.L066xts_dec_only_one_more: +.L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1628,16 +1733,16 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L067dec1_loop_13: +.L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L067dec1_loop_13 + jnz .L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -.L068xts_dec_steal: +.L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1645,7 +1750,7 @@ aesni_xts_decrypt: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L068xts_dec_steal + jnz .L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1655,16 +1760,30 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L069dec1_loop_14: +.L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L069dec1_loop_14 + jnz .L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -.L065xts_dec_ret: +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1690,7 +1809,7 @@ aesni_cbc_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz .L070cbc_abort + jz .L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1698,14 +1817,14 @@ aesni_cbc_encrypt: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je .L071cbc_decrypt + je .L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb .L072cbc_enc_tail + jb .L076cbc_enc_tail subl $16,%eax - jmp .L073cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L073cbc_enc_loop: +.L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1713,24 +1832,25 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -.L074enc1_loop_15: +.L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L074enc1_loop_15 + jnz .L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc .L073cbc_enc_loop + jnc .L077cbc_enc_loop addl $16,%eax - jnz .L072cbc_enc_tail + jnz .L076cbc_enc_tail movaps %xmm2,%xmm7 - jmp .L075cbc_ret -.L072cbc_enc_tail: + pxor %xmm2,%xmm2 + jmp .L079cbc_ret +.L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1741,20 +1861,20 @@ aesni_cbc_encrypt: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp .L073cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L071cbc_decrypt: +.L075cbc_decrypt: cmpl $80,%eax - jbe .L076cbc_dec_tail + jbe .L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp .L077cbc_dec_loop6_enter + jmp .L081cbc_dec_loop6_enter .align 16 -.L078cbc_dec_loop6: +.L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -.L077cbc_dec_loop6_enter: +.L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1784,28 +1904,28 @@ aesni_cbc_encrypt: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja .L078cbc_dec_loop6 + ja .L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle .L079cbc_dec_tail_collected + jle .L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -.L076cbc_dec_tail: +.L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe .L080cbc_dec_one + jbe .L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe .L081cbc_dec_two + jbe .L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe .L082cbc_dec_three + jbe .L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe .L083cbc_dec_four + jbe .L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1823,56 +1943,62 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 subl $80,%eax - jmp .L079cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L080cbc_dec_one: +.L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L084dec1_loop_16: +.L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L084dec1_loop_16 + jnz .L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp .L079cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L081cbc_dec_two: - xorps %xmm4,%xmm4 - call _aesni_decrypt3 +.L085cbc_dec_two: + call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp .L079cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L082cbc_dec_three: +.L086cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp .L079cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L083cbc_dec_four: +.L087cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1882,28 +2008,44 @@ aesni_cbc_encrypt: movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 subl $64,%eax -.L079cbc_dec_tail_collected: + jmp .L088cbc_dec_tail_collected +.align 16 +.L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L088cbc_dec_tail_collected: andl $15,%eax - jnz .L085cbc_dec_tail_partial + jnz .L090cbc_dec_tail_partial movups %xmm2,(%edi) - jmp .L075cbc_ret + pxor %xmm0,%xmm0 + jmp .L079cbc_ret .align 16 -.L085cbc_dec_tail_partial: +.L090cbc_dec_tail_partial: movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -.L075cbc_ret: + movdqa %xmm2,(%esp) +.L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 movups %xmm7,(%ebp) -.L070cbc_abort: + pxor %xmm7,%xmm7 +.L074cbc_abort: popl %edi popl %esi popl %ebx @@ -1913,52 +2055,62 @@ aesni_cbc_encrypt: .type _aesni_set_encrypt_key,@function .align 16 _aesni_set_encrypt_key: + pushl %ebp + pushl %ebx testl %eax,%eax - jz .L086bad_pointer + jz .L091bad_pointer testl %edx,%edx - jz .L086bad_pointer + jz .L091bad_pointer + call .L092pic +.L092pic: + popl %ebx + leal .Lkey_const-.L092pic(%ebx),%ebx + leal OPENSSL_ia32cap_P,%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp leal 16(%edx),%edx + andl $268437504,%ebp cmpl $256,%ecx - je .L08714rounds + je .L09314rounds cmpl $192,%ecx - je .L08812rounds + je .L09412rounds cmpl $128,%ecx - jne .L089bad_keybits + jne .L095bad_keybits .align 16 -.L09010rounds: +.L09610rounds: + cmpl $268435456,%ebp + je .L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call .L091key_128_cold + call .L098key_128_cold .byte 102,15,58,223,200,2 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,4 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,8 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,16 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,32 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,64 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,128 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,27 - call .L092key_128 + call .L099key_128 .byte 102,15,58,223,200,54 - call .L092key_128 + call .L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L092key_128: +.L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -.L091key_128_cold: +.L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -1967,38 +2119,91 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L08812rounds: +.L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L100good_key +.align 16 +.L09412rounds: movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call .L093key_192a_cold + call .L103key_192a_cold .byte 102,15,58,223,202,2 - call .L094key_192b + call .L104key_192b .byte 102,15,58,223,202,4 - call .L095key_192a + call .L105key_192a .byte 102,15,58,223,202,8 - call .L094key_192b + call .L104key_192b .byte 102,15,58,223,202,16 - call .L095key_192a + call .L105key_192a .byte 102,15,58,223,202,32 - call .L094key_192b + call .L104key_192b .byte 102,15,58,223,202,64 - call .L095key_192a + call .L105key_192a .byte 102,15,58,223,202,128 - call .L094key_192b + call .L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L095key_192a: +.L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 -.L093key_192a_cold: +.L103key_192a_cold: movaps %xmm2,%xmm5 -.L096key_192b_warm: +.L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2012,56 +2217,90 @@ _aesni_set_encrypt_key: pxor %xmm3,%xmm2 ret .align 16 -.L094key_192b: +.L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp .L096key_192b_warm + jmp .L106key_192b_warm +.align 16 +.L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L100good_key .align 16 -.L08714rounds: +.L09314rounds: movups 16(%eax),%xmm2 - movl $13,%ecx leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L10814rounds_alt + movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call .L097key_256a_cold + call .L109key_256a_cold .byte 102,15,58,223,200,1 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,2 - call .L099key_256a + call .L111key_256a .byte 102,15,58,223,200,2 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,4 - call .L099key_256a + call .L111key_256a .byte 102,15,58,223,200,4 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,8 - call .L099key_256a + call .L111key_256a .byte 102,15,58,223,200,8 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,16 - call .L099key_256a + call .L111key_256a .byte 102,15,58,223,200,16 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,32 - call .L099key_256a + call .L111key_256a .byte 102,15,58,223,200,32 - call .L098key_256b + call .L110key_256b .byte 102,15,58,223,202,64 - call .L099key_256a + call .L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L099key_256a: +.L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -.L097key_256a_cold: +.L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2070,7 +2309,7 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L098key_256b: +.L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2080,13 +2319,70 @@ _aesni_set_encrypt_key: shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret +.align 16 +.L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L112loop_key256 +.L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret .align 4 -.L086bad_pointer: +.L091bad_pointer: movl $-1,%eax + popl %ebx + popl %ebp ret .align 4 -.L089bad_keybits: +.L095bad_keybits: + pxor %xmm0,%xmm0 movl $-2,%eax + popl %ebx + popl %ebp ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key .globl aesni_set_encrypt_key @@ -2112,7 +2408,7 @@ aesni_set_decrypt_key: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz .L100dec_key_ret + jnz .L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2120,7 +2416,7 @@ aesni_set_decrypt_key: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -.L101dec_key_inverse: +.L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2130,15 +2426,24 @@ aesni_set_decrypt_key: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja .L101dec_key_inverse + ja .L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 xorl %eax,%eax -.L100dec_key_ret: +.L114dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/bn-586.s b/secure/lib/libcrypto/i386/bn-586.s index b40296e..3200c6d 100644 --- a/secure/lib/libcrypto/i386/bn-586.s +++ b/secure/lib/libcrypto/i386/bn-586.s @@ -1519,4 +1519,4 @@ bn_sub_part_words: popl %ebp ret .size bn_sub_part_words,.-.L_bn_sub_part_words_begin -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/cast-586.s b/secure/lib/libcrypto/i386/cast-586.s deleted file mode 100644 index 2d65735..0000000 --- a/secure/lib/libcrypto/i386/cast-586.s +++ /dev/null @@ -1,934 +0,0 @@ - # $FreeBSD$ -.file "cast-586.s" -.text -.globl CAST_encrypt -.type CAST_encrypt,@function -.align 16 -CAST_encrypt: -.L_CAST_encrypt_begin: - - pushl %ebp - pushl %ebx - movl 12(%esp),%ebx - movl 16(%esp),%ebp - pushl %esi - pushl %edi - - movl (%ebx),%edi - movl 4(%ebx),%esi - - movl 128(%ebp),%eax - pushl %eax - xorl %eax,%eax - - movl (%ebp),%edx - movl 4(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 8(%ebp),%edx - movl 12(%ebp),%ecx - xorl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%esi - - movl 16(%ebp),%edx - movl 20(%ebp),%ecx - subl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%edi - - movl 24(%ebp),%edx - movl 28(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi - - movl 32(%ebp),%edx - movl 36(%ebp),%ecx - xorl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%edi - - movl 40(%ebp),%edx - movl 44(%ebp),%ecx - subl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%esi - - movl 48(%ebp),%edx - movl 52(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 56(%ebp),%edx - movl 60(%ebp),%ecx - xorl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%esi - - movl 64(%ebp),%edx - movl 68(%ebp),%ecx - subl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%edi - - movl 72(%ebp),%edx - movl 76(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi - - movl 80(%ebp),%edx - movl 84(%ebp),%ecx - xorl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%edi - - movl 88(%ebp),%edx - movl 92(%ebp),%ecx - subl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%esi - - popl %edx - orl %edx,%edx - jnz .L000cast_enc_done - - movl 96(%ebp),%edx - movl 100(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 104(%ebp),%edx - movl 108(%ebp),%ecx - xorl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%esi - - movl 112(%ebp),%edx - movl 116(%ebp),%ecx - subl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%edi - - movl 120(%ebp),%edx - movl 124(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi -.L000cast_enc_done: - nop - movl 20(%esp),%eax - movl %edi,4(%eax) - movl %esi,(%eax) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size CAST_encrypt,.-.L_CAST_encrypt_begin -.globl CAST_decrypt -.type CAST_decrypt,@function -.align 16 -CAST_decrypt: -.L_CAST_decrypt_begin: - - pushl %ebp - pushl %ebx - movl 12(%esp),%ebx - movl 16(%esp),%ebp - pushl %esi - pushl %edi - - movl (%ebx),%edi - movl 4(%ebx),%esi - - movl 128(%ebp),%eax - orl %eax,%eax - jnz .L001cast_dec_skip - xorl %eax,%eax - - movl 120(%ebp),%edx - movl 124(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 112(%ebp),%edx - movl 116(%ebp),%ecx - subl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%esi - - movl 104(%ebp),%edx - movl 108(%ebp),%ecx - xorl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%edi - - movl 96(%ebp),%edx - movl 100(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi -.L001cast_dec_skip: - - movl 88(%ebp),%edx - movl 92(%ebp),%ecx - subl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%edi - - movl 80(%ebp),%edx - movl 84(%ebp),%ecx - xorl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%esi - - movl 72(%ebp),%edx - movl 76(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 64(%ebp),%edx - movl 68(%ebp),%ecx - subl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%esi - - movl 56(%ebp),%edx - movl 60(%ebp),%ecx - xorl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%edi - - movl 48(%ebp),%edx - movl 52(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi - - movl 40(%ebp),%edx - movl 44(%ebp),%ecx - subl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%edi - - movl 32(%ebp),%edx - movl 36(%ebp),%ecx - xorl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%esi - - movl 24(%ebp),%edx - movl 28(%ebp),%ecx - addl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%edi - - movl 16(%ebp),%edx - movl 20(%ebp),%ecx - subl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - addl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - subl %ebx,%ecx - xorl %ecx,%esi - - movl 8(%ebp),%edx - movl 12(%ebp),%ecx - xorl %esi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - subl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - addl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - xorl %ebx,%ecx - xorl %ecx,%edi - - movl (%ebp),%edx - movl 4(%ebp),%ecx - addl %edi,%edx - roll %cl,%edx - movl %edx,%ebx - xorl %ecx,%ecx - movb %dh,%cl - andl $255,%ebx - shrl $16,%edx - xorl %eax,%eax - movb %dh,%al - andl $255,%edx - movl CAST_S_table0(,%ecx,4),%ecx - movl CAST_S_table1(,%ebx,4),%ebx - xorl %ebx,%ecx - movl CAST_S_table2(,%eax,4),%ebx - subl %ebx,%ecx - movl CAST_S_table3(,%edx,4),%ebx - addl %ebx,%ecx - xorl %ecx,%esi - nop - movl 20(%esp),%eax - movl %edi,4(%eax) - movl %esi,(%eax) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.size CAST_decrypt,.-.L_CAST_decrypt_begin -.globl CAST_cbc_encrypt -.type CAST_cbc_encrypt,@function -.align 16 -CAST_cbc_encrypt: -.L_CAST_cbc_encrypt_begin: - - pushl %ebp - pushl %ebx - pushl %esi - pushl %edi - movl 28(%esp),%ebp - - movl 36(%esp),%ebx - movl (%ebx),%esi - movl 4(%ebx),%edi - pushl %edi - pushl %esi - pushl %edi - pushl %esi - movl %esp,%ebx - movl 36(%esp),%esi - movl 40(%esp),%edi - - movl 56(%esp),%ecx - - movl 48(%esp),%eax - pushl %eax - pushl %ebx - cmpl $0,%ecx - jz .L002decrypt - andl $4294967288,%ebp - movl 8(%esp),%eax - movl 12(%esp),%ebx - jz .L003encrypt_finish -.L004encrypt_loop: - movl (%esi),%ecx - movl 4(%esi),%edx - xorl %ecx,%eax - xorl %edx,%ebx - bswap %eax - bswap %ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_CAST_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - bswap %eax - bswap %ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz .L004encrypt_loop -.L003encrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz .L005finish - call .L006PIC_point -.L006PIC_point: - popl %edx - leal .L007cbc_enc_jmp_table-.L006PIC_point(%edx),%ecx - movl (%ecx,%ebp,4),%ebp - addl %edx,%ebp - xorl %ecx,%ecx - xorl %edx,%edx - jmp *%ebp -.L008ej7: - movb 6(%esi),%dh - shll $8,%edx -.L009ej6: - movb 5(%esi),%dh -.L010ej5: - movb 4(%esi),%dl -.L011ej4: - movl (%esi),%ecx - jmp .L012ejend -.L013ej3: - movb 2(%esi),%ch - shll $8,%ecx -.L014ej2: - movb 1(%esi),%ch -.L015ej1: - movb (%esi),%cl -.L012ejend: - xorl %ecx,%eax - xorl %edx,%ebx - bswap %eax - bswap %ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_CAST_encrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - bswap %eax - bswap %ebx - movl %eax,(%edi) - movl %ebx,4(%edi) - jmp .L005finish -.L002decrypt: - andl $4294967288,%ebp - movl 16(%esp),%eax - movl 20(%esp),%ebx - jz .L016decrypt_finish -.L017decrypt_loop: - movl (%esi),%eax - movl 4(%esi),%ebx - bswap %eax - bswap %ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_CAST_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - bswap %eax - bswap %ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx - movl %ecx,(%edi) - movl %edx,4(%edi) - movl %eax,16(%esp) - movl %ebx,20(%esp) - addl $8,%esi - addl $8,%edi - subl $8,%ebp - jnz .L017decrypt_loop -.L016decrypt_finish: - movl 52(%esp),%ebp - andl $7,%ebp - jz .L005finish - movl (%esi),%eax - movl 4(%esi),%ebx - bswap %eax - bswap %ebx - movl %eax,8(%esp) - movl %ebx,12(%esp) - call .L_CAST_decrypt_begin - movl 8(%esp),%eax - movl 12(%esp),%ebx - bswap %eax - bswap %ebx - movl 16(%esp),%ecx - movl 20(%esp),%edx - xorl %eax,%ecx - xorl %ebx,%edx - movl (%esi),%eax - movl 4(%esi),%ebx -.L018dj7: - rorl $16,%edx - movb %dl,6(%edi) - shrl $16,%edx -.L019dj6: - movb %dh,5(%edi) -.L020dj5: - movb %dl,4(%edi) -.L021dj4: - movl %ecx,(%edi) - jmp .L022djend -.L023dj3: - rorl $16,%ecx - movb %cl,2(%edi) - shll $16,%ecx -.L024dj2: - movb %ch,1(%esi) -.L025dj1: - movb %cl,(%esi) -.L022djend: - jmp .L005finish -.L005finish: - movl 60(%esp),%ecx - addl $24,%esp - movl %eax,(%ecx) - movl %ebx,4(%ecx) - popl %edi - popl %esi - popl %ebx - popl %ebp - ret -.align 64 -.L007cbc_enc_jmp_table: -.long 0 -.long .L015ej1-.L006PIC_point -.long .L014ej2-.L006PIC_point -.long .L013ej3-.L006PIC_point -.long .L011ej4-.L006PIC_point -.long .L010ej5-.L006PIC_point -.long .L009ej6-.L006PIC_point -.long .L008ej7-.L006PIC_point -.align 64 -.size CAST_cbc_encrypt,.-.L_CAST_cbc_encrypt_begin diff --git a/secure/lib/libcrypto/i386/des-586.s b/secure/lib/libcrypto/i386/des-586.s index 868b2ca..83bf07a 100644 --- a/secure/lib/libcrypto/i386/des-586.s +++ b/secure/lib/libcrypto/i386/des-586.s @@ -1001,7 +1001,7 @@ DES_encrypt1: call .L000pic_point .L000pic_point: popl %ebp - leal DES_SPtrans-.L000pic_point(%ebp),%ebp + leal .Ldes_sptrans-.L000pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L001decrypt @@ -1078,7 +1078,7 @@ DES_encrypt2: call .L003pic_point .L003pic_point: popl %ebp - leal DES_SPtrans-.L003pic_point(%ebp),%ebp + leal .Ldes_sptrans-.L003pic_point(%ebp),%ebp movl 24(%esp),%ecx cmpl $0,%ebx je .L004decrypt @@ -1708,6 +1708,7 @@ DES_ede3_cbc_encrypt: .size DES_ede3_cbc_encrypt,.-.L_DES_ede3_cbc_encrypt_begin .align 64 DES_SPtrans: +.Ldes_sptrans: .long 34080768,524288,33554434,34080770 .long 33554432,526338,524290,33554434 .long 526338,34080768,34078720,2050 diff --git a/secure/lib/libcrypto/i386/ghash-x86.s b/secure/lib/libcrypto/i386/ghash-x86.s index a200a97..53d5e3f 100644 --- a/secure/lib/libcrypto/i386/ghash-x86.s +++ b/secure/lib/libcrypto/i386/ghash-x86.s @@ -946,27 +946,34 @@ gcm_init_clmul: pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 - movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 + psrldq $8,%xmm3 pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + pshufd $78,%xmm2,%xmm3 + pshufd $78,%xmm0,%xmm4 + pxor %xmm2,%xmm3 movdqu %xmm2,(%edx) + pxor %xmm0,%xmm4 movdqu %xmm0,16(%edx) +.byte 102,15,58,15,227,8 + movdqu %xmm4,32(%edx) ret .size gcm_init_clmul,.-.L_gcm_init_clmul_begin .globl gcm_gmult_clmul @@ -984,11 +991,10 @@ gcm_gmult_clmul: movdqa (%ecx),%xmm5 movups (%edx),%xmm2 .byte 102,15,56,0,197 + movups 32(%edx),%xmm4 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 @@ -999,25 +1005,26 @@ gcm_gmult_clmul: pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 - movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 + psrldq $8,%xmm3 pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%eax) ret @@ -1049,127 +1056,115 @@ gcm_ghash_clmul: movdqu 16(%esi),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 + movdqu 32(%edx),%xmm5 pxor %xmm3,%xmm0 - movdqa %xmm6,%xmm7 pshufd $78,%xmm6,%xmm3 - pshufd $78,%xmm2,%xmm4 + movdqa %xmm6,%xmm7 pxor %xmm6,%xmm3 - pxor %xmm2,%xmm4 + leal 32(%esi),%esi .byte 102,15,58,68,242,0 .byte 102,15,58,68,250,17 -.byte 102,15,58,68,220,0 - xorps %xmm6,%xmm3 - xorps %xmm7,%xmm3 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm7 - pxor %xmm4,%xmm6 +.byte 102,15,58,68,221,0 movups 16(%edx),%xmm2 - leal 32(%esi),%esi + nop subl $32,%ebx jbe .L014even_tail + jmp .L015mod_loop +.align 32 .L015mod_loop: + pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 + pxor %xmm0,%xmm4 + nop .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 - xorps %xmm0,%xmm3 - xorps %xmm1,%xmm3 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - movdqu (%esi),%xmm3 +.byte 102,15,58,68,229,16 movups (%edx),%xmm2 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 + xorps %xmm6,%xmm0 + movdqa (%ecx),%xmm5 + xorps %xmm7,%xmm1 + movdqu (%esi),%xmm7 + pxor %xmm0,%xmm3 movdqu 16(%esi),%xmm6 -.byte 102,15,56,0,221 + pxor %xmm1,%xmm3 +.byte 102,15,56,0,253 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm3,%xmm0 .byte 102,15,56,0,245 - movdqa %xmm6,%xmm5 + pxor %xmm7,%xmm1 movdqa %xmm6,%xmm7 - pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 .byte 102,15,58,68,242,0 + movups 32(%edx),%xmm5 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pshufd $78,%xmm5,%xmm3 + psrldq $8,%xmm3 + pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + pshufd $78,%xmm7,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $1,%xmm0 + pxor %xmm7,%xmm3 pxor %xmm4,%xmm1 - pxor %xmm5,%xmm3 - pshufd $78,%xmm2,%xmm5 - pxor %xmm2,%xmm5 .byte 102,15,58,68,250,17 - movdqa %xmm0,%xmm4 + movups 16(%edx),%xmm2 + pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 - psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .byte 102,15,58,68,221,0 - movups 16(%edx),%xmm2 - xorps %xmm6,%xmm3 - xorps %xmm7,%xmm3 - movdqa %xmm3,%xmm5 - psrldq $8,%xmm3 - pslldq $8,%xmm5 - pxor %xmm3,%xmm7 - pxor %xmm5,%xmm6 - movdqa (%ecx),%xmm5 leal 32(%esi),%esi subl $32,%ebx ja .L015mod_loop .L014even_tail: + pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 - pshufd $78,%xmm0,%xmm3 - pshufd $78,%xmm2,%xmm4 - pxor %xmm0,%xmm3 - pxor %xmm2,%xmm4 + pxor %xmm0,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 -.byte 102,15,58,68,220,0 - xorps %xmm0,%xmm3 - xorps %xmm1,%xmm3 - movdqa %xmm3,%xmm4 - psrldq $8,%xmm3 - pslldq $8,%xmm4 - pxor %xmm3,%xmm1 - pxor %xmm4,%xmm0 - pxor %xmm6,%xmm0 - pxor %xmm7,%xmm1 - movdqa %xmm0,%xmm3 - psllq $1,%xmm0 +.byte 102,15,58,68,229,16 + movdqa (%ecx),%xmm5 + xorps %xmm6,%xmm0 + xorps %xmm7,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm1,%xmm3 + pxor %xmm3,%xmm4 + movdqa %xmm4,%xmm3 + psrldq $8,%xmm4 + pslldq $8,%xmm3 + pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 + movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 - movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 + psrldq $8,%xmm3 pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 testl %ebx,%ebx jnz .L016done movups (%edx),%xmm2 @@ -1192,25 +1187,26 @@ gcm_ghash_clmul: pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 + movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 - psllq $1,%xmm0 - pxor %xmm3,%xmm0 psllq $5,%xmm0 + pxor %xmm0,%xmm3 + psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 - movdqa %xmm0,%xmm4 + movdqa %xmm0,%xmm3 pslldq $8,%xmm0 - psrldq $8,%xmm4 - pxor %xmm3,%xmm0 - pxor %xmm4,%xmm1 - movdqa %xmm0,%xmm4 - psrlq $5,%xmm0 + psrldq $8,%xmm3 pxor %xmm4,%xmm0 + pxor %xmm3,%xmm1 + movdqa %xmm0,%xmm4 psrlq $1,%xmm0 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + psrlq $5,%xmm0 pxor %xmm4,%xmm0 - pxor %xmm1,%xmm4 psrlq $1,%xmm0 - pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 .L016done: .byte 102,15,56,0,197 movdqu %xmm0,(%eax) @@ -1225,12 +1221,6 @@ gcm_ghash_clmul: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 64 -.Lrem_4bit: -.long 0,0,0,471859200,0,943718400,0,610271232 -.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 -.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 -.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 -.align 64 .Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 @@ -1264,6 +1254,12 @@ gcm_ghash_clmul: .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 +.align 64 +.Lrem_4bit: +.long 0,0,0,471859200,0,943718400,0,610271232 +.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 +.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 +.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 diff --git a/secure/lib/libcrypto/i386/rc4-586.s b/secure/lib/libcrypto/i386/rc4-586.s index c1f70a6..e9603ae 100644 --- a/secure/lib/libcrypto/i386/rc4-586.s +++ b/secure/lib/libcrypto/i386/rc4-586.s @@ -30,8 +30,8 @@ RC4: movl (%edi,%eax,4),%ecx andl $-4,%edx jz .L002loop1 - testl $-8,%edx movl %ebp,32(%esp) + testl $-8,%edx jz .L003go4loop4 leal OPENSSL_ia32cap_P,%ebp btl $26,(%ebp) @@ -370,4 +370,4 @@ RC4_options: .byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 .size RC4_options,.-.L_RC4_options_begin -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/sha1-586.s b/secure/lib/libcrypto/i386/sha1-586.s index ffb8883..74b1ab4 100644 --- a/secure/lib/libcrypto/i386/sha1-586.s +++ b/secure/lib/libcrypto/i386/sha1-586.s @@ -19,8 +19,11 @@ sha1_block_data_order: movl 4(%esi),%edx testl $512,%edx jz .L001x86 + movl 8(%esi),%ecx testl $16777216,%eax jz .L001x86 + testl $536870912,%ecx + jnz .Lshaext_shortcut jmp .Lssse3_shortcut .align 16 .L001x86: @@ -1389,9 +1392,9 @@ sha1_block_data_order: popl %ebp ret .size sha1_block_data_order,.-.L_sha1_block_data_order_begin -.type _sha1_block_data_order_ssse3,@function +.type _sha1_block_data_order_shaext,@function .align 16 -_sha1_block_data_order_ssse3: +_sha1_block_data_order_shaext: pushl %ebp pushl %ebx pushl %esi @@ -1400,6 +1403,176 @@ _sha1_block_data_order_ssse3: .L003pic_point: popl %ebp leal .LK_XX_XX-.L003pic_point(%ebp),%ebp +.Lshaext_shortcut: + movl 20(%esp),%edi + movl %esp,%ebx + movl 24(%esp),%esi + movl 28(%esp),%ecx + subl $32,%esp + movdqu (%edi),%xmm0 + movd 16(%edi),%xmm1 + andl $-32,%esp + movdqa 80(%ebp),%xmm3 + movdqu (%esi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%esi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 +.byte 102,15,56,0,251 + jmp .L004loop_shaext +.align 16 +.L004loop_shaext: + decl %ecx + leal 64(%esi),%eax + movdqa %xmm1,(%esp) + paddd %xmm4,%xmm1 + cmovnel %eax,%esi + movdqa %xmm0,16(%esp) +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%esi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%esi),%xmm5 +.byte 102,15,56,0,227 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%esi),%xmm6 +.byte 102,15,56,0,235 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%esi),%xmm7 +.byte 102,15,56,0,243 + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 + movdqa (%esp),%xmm2 +.byte 102,15,56,0,251 +.byte 15,56,200,202 + paddd 16(%esp),%xmm0 + jnz .L004loop_shaext + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%edi) + movd %xmm1,16(%edi) + movl %ebx,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _sha1_block_data_order_shaext,.-_sha1_block_data_order_shaext +.type _sha1_block_data_order_ssse3,@function +.align 16 +_sha1_block_data_order_ssse3: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + call .L005pic_point +.L005pic_point: + popl %ebp + leal .LK_XX_XX-.L005pic_point(%ebp),%ebp .Lssse3_shortcut: movdqa (%ebp),%xmm7 movdqa 16(%ebp),%xmm0 @@ -1447,936 +1620,917 @@ _sha1_block_data_order_ssse3: movdqa %xmm1,16(%esp) psubd %xmm7,%xmm1 movdqa %xmm2,32(%esp) + movl %ecx,%ebp psubd %xmm7,%xmm2 - movdqa %xmm1,%xmm4 - jmp .L004loop + xorl %edx,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebp,%esi + jmp .L006loop .align 16 -.L004loop: - addl (%esp),%edi - xorl %edx,%ecx -.byte 102,15,58,15,224,8 - movdqa %xmm3,%xmm6 +.L006loop: + rorl $2,%ebx + xorl %edx,%esi movl %eax,%ebp - roll $5,%eax + punpcklqdq %xmm1,%xmm4 + movdqa %xmm3,%xmm6 + addl (%esp),%edi + xorl %ecx,%ebx paddd %xmm3,%xmm7 movdqa %xmm0,64(%esp) - andl %ecx,%esi - xorl %edx,%ecx + roll $5,%eax + addl %esi,%edi psrldq $4,%xmm6 - xorl %edx,%esi - addl %eax,%edi + andl %ebx,%ebp + xorl %ecx,%ebx pxor %xmm0,%xmm4 - rorl $2,%ebx - addl %esi,%edi + addl %eax,%edi + rorl $7,%eax pxor %xmm2,%xmm6 - addl 4(%esp),%edx - xorl %ecx,%ebx + xorl %ecx,%ebp movl %edi,%esi - roll $5,%edi + addl 4(%esp),%edx pxor %xmm6,%xmm4 - andl %ebx,%ebp - xorl %ecx,%ebx + xorl %ebx,%eax + roll $5,%edi movdqa %xmm7,48(%esp) - xorl %ecx,%ebp - addl %edi,%edx - movdqa %xmm4,%xmm0 - movdqa %xmm4,%xmm6 - rorl $7,%eax addl %ebp,%edx - addl 8(%esp),%ecx + andl %eax,%esi + movdqa %xmm4,%xmm0 xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + movdqa %xmm4,%xmm6 + xorl %ebx,%esi pslldq $12,%xmm0 paddd %xmm4,%xmm4 movl %edx,%ebp - roll $5,%edx - andl %eax,%esi - xorl %ebx,%eax + addl 8(%esp),%ecx psrld $31,%xmm6 - xorl %ebx,%esi - addl %edx,%ecx + xorl %eax,%edi + roll $5,%edx movdqa %xmm0,%xmm7 - rorl $7,%edi addl %esi,%ecx + andl %edi,%ebp + xorl %eax,%edi psrld $30,%xmm0 + addl %edx,%ecx + rorl $7,%edx por %xmm6,%xmm4 - addl 12(%esp),%ebx - xorl %eax,%edi + xorl %eax,%ebp movl %ecx,%esi - roll $5,%ecx + addl 12(%esp),%ebx pslld $2,%xmm7 + xorl %edi,%edx + roll $5,%ecx pxor %xmm0,%xmm4 - andl %edi,%ebp - xorl %eax,%edi movdqa 96(%esp),%xmm0 - xorl %eax,%ebp - addl %ecx,%ebx - pxor %xmm7,%xmm4 - movdqa %xmm2,%xmm5 - rorl $7,%edx addl %ebp,%ebx - addl 16(%esp),%eax + andl %edx,%esi + pxor %xmm7,%xmm4 + pshufd $238,%xmm1,%xmm5 xorl %edi,%edx -.byte 102,15,58,15,233,8 - movdqa %xmm4,%xmm7 + addl %ecx,%ebx + rorl $7,%ecx + xorl %edi,%esi movl %ebx,%ebp - roll $5,%ebx + punpcklqdq %xmm2,%xmm5 + movdqa %xmm4,%xmm7 + addl 16(%esp),%eax + xorl %edx,%ecx paddd %xmm4,%xmm0 movdqa %xmm1,80(%esp) - andl %edx,%esi - xorl %edi,%edx + roll $5,%ebx + addl %esi,%eax psrldq $4,%xmm7 - xorl %edi,%esi - addl %ebx,%eax + andl %ecx,%ebp + xorl %edx,%ecx pxor %xmm1,%xmm5 - rorl $7,%ecx - addl %esi,%eax + addl %ebx,%eax + rorl $7,%ebx pxor %xmm3,%xmm7 - addl 20(%esp),%edi - xorl %edx,%ecx + xorl %edx,%ebp movl %eax,%esi - roll $5,%eax + addl 20(%esp),%edi pxor %xmm7,%xmm5 - andl %ecx,%ebp - xorl %edx,%ecx + xorl %ecx,%ebx + roll $5,%eax movdqa %xmm0,(%esp) - xorl %edx,%ebp - addl %eax,%edi - movdqa %xmm5,%xmm1 - movdqa %xmm5,%xmm7 - rorl $7,%ebx addl %ebp,%edi - addl 24(%esp),%edx + andl %ebx,%esi + movdqa %xmm5,%xmm1 xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + movdqa %xmm5,%xmm7 + xorl %ecx,%esi pslldq $12,%xmm1 paddd %xmm5,%xmm5 movl %edi,%ebp - roll $5,%edi - andl %ebx,%esi - xorl %ecx,%ebx + addl 24(%esp),%edx psrld $31,%xmm7 - xorl %ecx,%esi - addl %edi,%edx + xorl %ebx,%eax + roll $5,%edi movdqa %xmm1,%xmm0 - rorl $7,%eax addl %esi,%edx + andl %eax,%ebp + xorl %ebx,%eax psrld $30,%xmm1 + addl %edi,%edx + rorl $7,%edi por %xmm7,%xmm5 - addl 28(%esp),%ecx - xorl %ebx,%eax + xorl %ebx,%ebp movl %edx,%esi - roll $5,%edx + addl 28(%esp),%ecx pslld $2,%xmm0 + xorl %eax,%edi + roll $5,%edx pxor %xmm1,%xmm5 - andl %eax,%ebp - xorl %ebx,%eax movdqa 112(%esp),%xmm1 - xorl %ebx,%ebp - addl %edx,%ecx - pxor %xmm0,%xmm5 - movdqa %xmm3,%xmm6 - rorl $7,%edi addl %ebp,%ecx - addl 32(%esp),%ebx + andl %edi,%esi + pxor %xmm0,%xmm5 + pshufd $238,%xmm2,%xmm6 xorl %eax,%edi -.byte 102,15,58,15,242,8 - movdqa %xmm5,%xmm0 + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi movl %ecx,%ebp - roll $5,%ecx + punpcklqdq %xmm3,%xmm6 + movdqa %xmm5,%xmm0 + addl 32(%esp),%ebx + xorl %edi,%edx paddd %xmm5,%xmm1 movdqa %xmm2,96(%esp) - andl %edi,%esi - xorl %eax,%edi + roll $5,%ecx + addl %esi,%ebx psrldq $4,%xmm0 - xorl %eax,%esi - addl %ecx,%ebx + andl %edx,%ebp + xorl %edi,%edx pxor %xmm2,%xmm6 - rorl $7,%edx - addl %esi,%ebx + addl %ecx,%ebx + rorl $7,%ecx pxor %xmm4,%xmm0 - addl 36(%esp),%eax - xorl %edi,%edx + xorl %edi,%ebp movl %ebx,%esi - roll $5,%ebx + addl 36(%esp),%eax pxor %xmm0,%xmm6 - andl %edx,%ebp - xorl %edi,%edx + xorl %edx,%ecx + roll $5,%ebx movdqa %xmm1,16(%esp) - xorl %edi,%ebp - addl %ebx,%eax - movdqa %xmm6,%xmm2 - movdqa %xmm6,%xmm0 - rorl $7,%ecx addl %ebp,%eax - addl 40(%esp),%edi + andl %ecx,%esi + movdqa %xmm6,%xmm2 xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm0 + xorl %edx,%esi pslldq $12,%xmm2 paddd %xmm6,%xmm6 movl %eax,%ebp - roll $5,%eax - andl %ecx,%esi - xorl %edx,%ecx + addl 40(%esp),%edi psrld $31,%xmm0 - xorl %edx,%esi - addl %eax,%edi + xorl %ecx,%ebx + roll $5,%eax movdqa %xmm2,%xmm1 - rorl $7,%ebx addl %esi,%edi + andl %ebx,%ebp + xorl %ecx,%ebx psrld $30,%xmm2 + addl %eax,%edi + rorl $7,%eax por %xmm0,%xmm6 - addl 44(%esp),%edx - xorl %ecx,%ebx + xorl %ecx,%ebp movdqa 64(%esp),%xmm0 movl %edi,%esi - roll $5,%edi + addl 44(%esp),%edx pslld $2,%xmm1 + xorl %ebx,%eax + roll $5,%edi pxor %xmm2,%xmm6 - andl %ebx,%ebp - xorl %ecx,%ebx movdqa 112(%esp),%xmm2 - xorl %ecx,%ebp - addl %edi,%edx - pxor %xmm1,%xmm6 - movdqa %xmm4,%xmm7 - rorl $7,%eax addl %ebp,%edx - addl 48(%esp),%ecx + andl %eax,%esi + pxor %xmm1,%xmm6 + pshufd $238,%xmm3,%xmm7 xorl %ebx,%eax -.byte 102,15,58,15,251,8 - movdqa %xmm6,%xmm1 + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%esi movl %edx,%ebp - roll $5,%edx + punpcklqdq %xmm4,%xmm7 + movdqa %xmm6,%xmm1 + addl 48(%esp),%ecx + xorl %eax,%edi paddd %xmm6,%xmm2 movdqa %xmm3,64(%esp) - andl %eax,%esi - xorl %ebx,%eax + roll $5,%edx + addl %esi,%ecx psrldq $4,%xmm1 - xorl %ebx,%esi - addl %edx,%ecx + andl %edi,%ebp + xorl %eax,%edi pxor %xmm3,%xmm7 - rorl $7,%edi - addl %esi,%ecx + addl %edx,%ecx + rorl $7,%edx pxor %xmm5,%xmm1 - addl 52(%esp),%ebx - xorl %eax,%edi + xorl %eax,%ebp movl %ecx,%esi - roll $5,%ecx + addl 52(%esp),%ebx pxor %xmm1,%xmm7 - andl %edi,%ebp - xorl %eax,%edi + xorl %edi,%edx + roll $5,%ecx movdqa %xmm2,32(%esp) - xorl %eax,%ebp - addl %ecx,%ebx - movdqa %xmm7,%xmm3 - movdqa %xmm7,%xmm1 - rorl $7,%edx addl %ebp,%ebx - addl 56(%esp),%eax + andl %edx,%esi + movdqa %xmm7,%xmm3 xorl %edi,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm1 + xorl %edi,%esi pslldq $12,%xmm3 paddd %xmm7,%xmm7 movl %ebx,%ebp - roll $5,%ebx - andl %edx,%esi - xorl %edi,%edx + addl 56(%esp),%eax psrld $31,%xmm1 - xorl %edi,%esi - addl %ebx,%eax + xorl %edx,%ecx + roll $5,%ebx movdqa %xmm3,%xmm2 - rorl $7,%ecx addl %esi,%eax + andl %ecx,%ebp + xorl %edx,%ecx psrld $30,%xmm3 + addl %ebx,%eax + rorl $7,%ebx por %xmm1,%xmm7 - addl 60(%esp),%edi - xorl %edx,%ecx + xorl %edx,%ebp movdqa 80(%esp),%xmm1 movl %eax,%esi - roll $5,%eax + addl 60(%esp),%edi pslld $2,%xmm2 + xorl %ecx,%ebx + roll $5,%eax pxor %xmm3,%xmm7 - andl %ecx,%ebp - xorl %edx,%ecx movdqa 112(%esp),%xmm3 - xorl %edx,%ebp - addl %eax,%edi - pxor %xmm2,%xmm7 - rorl $7,%ebx addl %ebp,%edi - movdqa %xmm7,%xmm2 - addl (%esp),%edx - pxor %xmm4,%xmm0 -.byte 102,15,58,15,214,8 + andl %ebx,%esi + pxor %xmm2,%xmm7 + pshufd $238,%xmm6,%xmm2 xorl %ecx,%ebx + addl %eax,%edi + rorl $7,%eax + pxor %xmm4,%xmm0 + punpcklqdq %xmm7,%xmm2 + xorl %ecx,%esi movl %edi,%ebp - roll $5,%edi + addl (%esp),%edx pxor %xmm1,%xmm0 movdqa %xmm4,80(%esp) - andl %ebx,%esi - xorl %ecx,%ebx + xorl %ebx,%eax + roll $5,%edi movdqa %xmm3,%xmm4 + addl %esi,%edx paddd %xmm7,%xmm3 - xorl %ecx,%esi - addl %edi,%edx + andl %eax,%ebp pxor %xmm2,%xmm0 - rorl $7,%eax - addl %esi,%edx - addl 4(%esp),%ecx xorl %ebx,%eax + addl %edi,%edx + rorl $7,%edi + xorl %ebx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) movl %edx,%esi + addl 4(%esp),%ecx + xorl %eax,%edi roll $5,%edx - andl %eax,%ebp - xorl %ebx,%eax pslld $2,%xmm0 - xorl %ebx,%ebp - addl %edx,%ecx - psrld $30,%xmm2 - rorl $7,%edi addl %ebp,%ecx - addl 8(%esp),%ebx + andl %edi,%esi + psrld $30,%xmm2 xorl %eax,%edi + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi movl %ecx,%ebp + addl 8(%esp),%ebx + xorl %edi,%edx roll $5,%ecx por %xmm2,%xmm0 - andl %edi,%esi - xorl %eax,%edi + addl %esi,%ebx + andl %edx,%ebp movdqa 96(%esp),%xmm2 - xorl %eax,%esi + xorl %edi,%edx addl %ecx,%ebx - rorl $7,%edx - addl %esi,%ebx addl 12(%esp),%eax - movdqa %xmm0,%xmm3 - xorl %edi,%edx + xorl %edi,%ebp movl %ebx,%esi + pshufd $238,%xmm7,%xmm3 roll $5,%ebx - andl %edx,%ebp - xorl %edi,%edx - xorl %edi,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax addl 16(%esp),%edi pxor %xmm5,%xmm1 -.byte 102,15,58,15,223,8 - xorl %edx,%esi + punpcklqdq %xmm0,%xmm3 + xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm2,%xmm1 movdqa %xmm5,96(%esp) - xorl %ecx,%esi - addl %eax,%edi + addl %esi,%edi + xorl %ecx,%ebp movdqa %xmm4,%xmm5 - paddd %xmm0,%xmm4 rorl $7,%ebx - addl %esi,%edi + paddd %xmm0,%xmm4 + addl %eax,%edi pxor %xmm3,%xmm1 addl 20(%esp),%edx - xorl %ecx,%ebp + xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) - xorl %ebx,%ebp - addl %edi,%edx - rorl $7,%eax addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm1 addl 24(%esp),%ecx - xorl %ebx,%esi + xorl %eax,%esi psrld $30,%xmm3 movl %edx,%ebp roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%edi addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx por %xmm3,%xmm1 addl 28(%esp),%ebx - xorl %eax,%ebp + xorl %edi,%ebp movdqa 64(%esp),%xmm3 movl %ecx,%esi roll $5,%ecx - xorl %edi,%ebp - addl %ecx,%ebx - rorl $7,%edx - movdqa %xmm1,%xmm4 addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + pshufd $238,%xmm0,%xmm4 + addl %ecx,%ebx addl 32(%esp),%eax pxor %xmm6,%xmm2 -.byte 102,15,58,15,224,8 - xorl %edi,%esi + punpcklqdq %xmm1,%xmm4 + xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx pxor %xmm3,%xmm2 movdqa %xmm6,64(%esp) - xorl %edx,%esi - addl %ebx,%eax + addl %esi,%eax + xorl %edx,%ebp movdqa 128(%esp),%xmm6 - paddd %xmm1,%xmm5 rorl $7,%ecx - addl %esi,%eax + paddd %xmm1,%xmm5 + addl %ebx,%eax pxor %xmm4,%xmm2 addl 36(%esp),%edi - xorl %edx,%ebp + xorl %ecx,%ebp movl %eax,%esi roll $5,%eax movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) - xorl %ecx,%ebp - addl %eax,%edi - rorl $7,%ebx addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi pslld $2,%xmm2 addl 40(%esp),%edx - xorl %ecx,%esi + xorl %ebx,%esi psrld $30,%xmm4 movl %edi,%ebp roll $5,%edi - xorl %ebx,%esi - addl %edi,%edx - rorl $7,%eax addl %esi,%edx + xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx por %xmm4,%xmm2 addl 44(%esp),%ecx - xorl %ebx,%ebp + xorl %eax,%ebp movdqa 80(%esp),%xmm4 movl %edx,%esi roll $5,%edx - xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edi - movdqa %xmm2,%xmm5 addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + pshufd $238,%xmm1,%xmm5 + addl %edx,%ecx addl 48(%esp),%ebx pxor %xmm7,%xmm3 -.byte 102,15,58,15,233,8 - xorl %eax,%esi + punpcklqdq %xmm2,%xmm5 + xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx pxor %xmm4,%xmm3 movdqa %xmm7,80(%esp) - xorl %edi,%esi - addl %ecx,%ebx + addl %esi,%ebx + xorl %edi,%ebp movdqa %xmm6,%xmm7 - paddd %xmm2,%xmm6 rorl $7,%edx - addl %esi,%ebx + paddd %xmm2,%xmm6 + addl %ecx,%ebx pxor %xmm5,%xmm3 addl 52(%esp),%eax - xorl %edi,%ebp + xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) - xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax pslld $2,%xmm3 addl 56(%esp),%edi - xorl %edx,%esi + xorl %ecx,%esi psrld $30,%xmm5 movl %eax,%ebp roll $5,%eax - xorl %ecx,%esi - addl %eax,%edi - rorl $7,%ebx addl %esi,%edi + xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi por %xmm5,%xmm3 addl 60(%esp),%edx - xorl %ecx,%ebp + xorl %ebx,%ebp movdqa 96(%esp),%xmm5 movl %edi,%esi roll $5,%edi - xorl %ebx,%ebp - addl %edi,%edx - rorl $7,%eax - movdqa %xmm3,%xmm6 addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + pshufd $238,%xmm2,%xmm6 + addl %edi,%edx addl (%esp),%ecx pxor %xmm0,%xmm4 -.byte 102,15,58,15,242,8 - xorl %ebx,%esi + punpcklqdq %xmm3,%xmm6 + xorl %eax,%esi movl %edx,%ebp roll $5,%edx pxor %xmm5,%xmm4 movdqa %xmm0,96(%esp) - xorl %eax,%esi - addl %edx,%ecx + addl %esi,%ecx + xorl %eax,%ebp movdqa %xmm7,%xmm0 - paddd %xmm3,%xmm7 rorl $7,%edi - addl %esi,%ecx + paddd %xmm3,%xmm7 + addl %edx,%ecx pxor %xmm6,%xmm4 addl 4(%esp),%ebx - xorl %eax,%ebp + xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx movdqa %xmm4,%xmm6 movdqa %xmm7,48(%esp) - xorl %edi,%ebp - addl %ecx,%ebx - rorl $7,%edx addl %ebp,%ebx + xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx pslld $2,%xmm4 addl 8(%esp),%eax - xorl %edi,%esi + xorl %edx,%esi psrld $30,%xmm6 movl %ebx,%ebp roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax + xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax por %xmm6,%xmm4 addl 12(%esp),%edi - xorl %edx,%ebp + xorl %ecx,%ebp movdqa 64(%esp),%xmm6 movl %eax,%esi roll $5,%eax - xorl %ecx,%ebp - addl %eax,%edi - rorl $7,%ebx - movdqa %xmm4,%xmm7 addl %ebp,%edi + xorl %ecx,%esi + rorl $7,%ebx + pshufd $238,%xmm3,%xmm7 + addl %eax,%edi addl 16(%esp),%edx pxor %xmm1,%xmm5 -.byte 102,15,58,15,251,8 - xorl %ecx,%esi + punpcklqdq %xmm4,%xmm7 + xorl %ebx,%esi movl %edi,%ebp roll $5,%edi pxor %xmm6,%xmm5 movdqa %xmm1,64(%esp) - xorl %ebx,%esi - addl %edi,%edx + addl %esi,%edx + xorl %ebx,%ebp movdqa %xmm0,%xmm1 - paddd %xmm4,%xmm0 rorl $7,%eax - addl %esi,%edx + paddd %xmm4,%xmm0 + addl %edi,%edx pxor %xmm7,%xmm5 addl 20(%esp),%ecx - xorl %ebx,%ebp + xorl %eax,%ebp movl %edx,%esi roll $5,%edx movdqa %xmm5,%xmm7 movdqa %xmm0,(%esp) - xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edi addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx pslld $2,%xmm5 addl 24(%esp),%ebx - xorl %eax,%esi + xorl %edi,%esi psrld $30,%xmm7 movl %ecx,%ebp roll $5,%ecx - xorl %edi,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx + xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx por %xmm7,%xmm5 addl 28(%esp),%eax - xorl %edi,%ebp movdqa 80(%esp),%xmm7 + rorl $7,%ecx movl %ebx,%esi - roll $5,%ebx xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx - movdqa %xmm5,%xmm0 + roll $5,%ebx + pshufd $238,%xmm4,%xmm0 addl %ebp,%eax - movl %ecx,%ebp - pxor %xmm2,%xmm6 -.byte 102,15,58,15,196,8 + xorl %ecx,%esi xorl %edx,%ecx + addl %ebx,%eax addl 32(%esp),%edi - andl %edx,%ebp - pxor %xmm7,%xmm6 - movdqa %xmm2,80(%esp) + pxor %xmm2,%xmm6 + punpcklqdq %xmm5,%xmm0 andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - movdqa %xmm1,%xmm2 - paddd %xmm5,%xmm1 - addl %ebp,%edi + pxor %xmm7,%xmm6 + movdqa %xmm2,80(%esp) movl %eax,%ebp - pxor %xmm0,%xmm6 + xorl %ecx,%esi roll $5,%eax + movdqa %xmm1,%xmm2 addl %esi,%edi - xorl %edx,%ecx + paddd %xmm5,%xmm1 + xorl %ebx,%ebp + pxor %xmm0,%xmm6 + xorl %ecx,%ebx addl %eax,%edi + addl 36(%esp),%edx + andl %ebx,%ebp movdqa %xmm6,%xmm0 movdqa %xmm1,16(%esp) - movl %ebx,%esi xorl %ecx,%ebx - addl 36(%esp),%edx - andl %ecx,%esi - pslld $2,%xmm6 - andl %ebx,%ebp rorl $7,%eax - psrld $30,%xmm0 - addl %esi,%edx movl %edi,%esi + xorl %ebx,%ebp roll $5,%edi + pslld $2,%xmm6 addl %ebp,%edx - xorl %ecx,%ebx - addl %edi,%edx - por %xmm0,%xmm6 - movl %eax,%ebp + xorl %eax,%esi + psrld $30,%xmm0 xorl %ebx,%eax - movdqa 96(%esp),%xmm0 + addl %edi,%edx addl 40(%esp),%ecx - andl %ebx,%ebp andl %eax,%esi + xorl %ebx,%eax rorl $7,%edi - addl %ebp,%ecx - movdqa %xmm6,%xmm1 + por %xmm0,%xmm6 movl %edx,%ebp + xorl %eax,%esi + movdqa 96(%esp),%xmm0 roll $5,%edx addl %esi,%ecx - xorl %ebx,%eax - addl %edx,%ecx - movl %edi,%esi + xorl %edi,%ebp xorl %eax,%edi + addl %edx,%ecx + pshufd $238,%xmm5,%xmm1 addl 44(%esp),%ebx - andl %eax,%esi andl %edi,%ebp + xorl %eax,%edi rorl $7,%edx - addl %esi,%ebx movl %ecx,%esi + xorl %edi,%ebp roll $5,%ecx addl %ebp,%ebx - xorl %eax,%edi + xorl %edx,%esi + xorl %edi,%edx addl %ecx,%ebx - movl %edx,%ebp + addl 48(%esp),%eax pxor %xmm3,%xmm7 -.byte 102,15,58,15,205,8 + punpcklqdq %xmm6,%xmm1 + andl %edx,%esi xorl %edi,%edx - addl 48(%esp),%eax - andl %edi,%ebp + rorl $7,%ecx pxor %xmm0,%xmm7 movdqa %xmm3,96(%esp) - andl %edx,%esi - rorl $7,%ecx - movdqa 144(%esp),%xmm3 - paddd %xmm6,%xmm2 - addl %ebp,%eax movl %ebx,%ebp - pxor %xmm1,%xmm7 + xorl %edx,%esi roll $5,%ebx + movdqa 144(%esp),%xmm3 addl %esi,%eax - xorl %edi,%edx + paddd %xmm6,%xmm2 + xorl %ecx,%ebp + pxor %xmm1,%xmm7 + xorl %edx,%ecx addl %ebx,%eax + addl 52(%esp),%edi + andl %ecx,%ebp movdqa %xmm7,%xmm1 movdqa %xmm2,32(%esp) - movl %ecx,%esi xorl %edx,%ecx - addl 52(%esp),%edi - andl %edx,%esi - pslld $2,%xmm7 - andl %ecx,%ebp rorl $7,%ebx - psrld $30,%xmm1 - addl %esi,%edi movl %eax,%esi + xorl %ecx,%ebp roll $5,%eax + pslld $2,%xmm7 addl %ebp,%edi - xorl %edx,%ecx - addl %eax,%edi - por %xmm1,%xmm7 - movl %ebx,%ebp + xorl %ebx,%esi + psrld $30,%xmm1 xorl %ecx,%ebx - movdqa 64(%esp),%xmm1 + addl %eax,%edi addl 56(%esp),%edx - andl %ecx,%ebp andl %ebx,%esi + xorl %ecx,%ebx rorl $7,%eax - addl %ebp,%edx - movdqa %xmm7,%xmm2 + por %xmm1,%xmm7 movl %edi,%ebp + xorl %ebx,%esi + movdqa 64(%esp),%xmm1 roll $5,%edi addl %esi,%edx - xorl %ecx,%ebx - addl %edi,%edx - movl %eax,%esi + xorl %eax,%ebp xorl %ebx,%eax + addl %edi,%edx + pshufd $238,%xmm6,%xmm2 addl 60(%esp),%ecx - andl %ebx,%esi andl %eax,%ebp + xorl %ebx,%eax rorl $7,%edi - addl %esi,%ecx movl %edx,%esi + xorl %eax,%ebp roll $5,%edx addl %ebp,%ecx - xorl %ebx,%eax + xorl %edi,%esi + xorl %eax,%edi addl %edx,%ecx - movl %edi,%ebp + addl (%esp),%ebx pxor %xmm4,%xmm0 -.byte 102,15,58,15,214,8 + punpcklqdq %xmm7,%xmm2 + andl %edi,%esi xorl %eax,%edi - addl (%esp),%ebx - andl %eax,%ebp + rorl $7,%edx pxor %xmm1,%xmm0 movdqa %xmm4,64(%esp) - andl %edi,%esi - rorl $7,%edx - movdqa %xmm3,%xmm4 - paddd %xmm7,%xmm3 - addl %ebp,%ebx movl %ecx,%ebp - pxor %xmm2,%xmm0 + xorl %edi,%esi roll $5,%ecx + movdqa %xmm3,%xmm4 addl %esi,%ebx - xorl %eax,%edi + paddd %xmm7,%xmm3 + xorl %edx,%ebp + pxor %xmm2,%xmm0 + xorl %edi,%edx addl %ecx,%ebx + addl 4(%esp),%eax + andl %edx,%ebp movdqa %xmm0,%xmm2 movdqa %xmm3,48(%esp) - movl %edx,%esi xorl %edi,%edx - addl 4(%esp),%eax - andl %edi,%esi - pslld $2,%xmm0 - andl %edx,%ebp rorl $7,%ecx - psrld $30,%xmm2 - addl %esi,%eax movl %ebx,%esi + xorl %edx,%ebp roll $5,%ebx + pslld $2,%xmm0 addl %ebp,%eax - xorl %edi,%edx - addl %ebx,%eax - por %xmm2,%xmm0 - movl %ecx,%ebp + xorl %ecx,%esi + psrld $30,%xmm2 xorl %edx,%ecx - movdqa 80(%esp),%xmm2 + addl %ebx,%eax addl 8(%esp),%edi - andl %edx,%ebp andl %ecx,%esi + xorl %edx,%ecx rorl $7,%ebx - addl %ebp,%edi - movdqa %xmm0,%xmm3 + por %xmm2,%xmm0 movl %eax,%ebp + xorl %ecx,%esi + movdqa 80(%esp),%xmm2 roll $5,%eax addl %esi,%edi - xorl %edx,%ecx - addl %eax,%edi - movl %ebx,%esi + xorl %ebx,%ebp xorl %ecx,%ebx + addl %eax,%edi + pshufd $238,%xmm7,%xmm3 addl 12(%esp),%edx - andl %ecx,%esi andl %ebx,%ebp + xorl %ecx,%ebx rorl $7,%eax - addl %esi,%edx movl %edi,%esi + xorl %ebx,%ebp roll $5,%edi addl %ebp,%edx - xorl %ecx,%ebx + xorl %eax,%esi + xorl %ebx,%eax addl %edi,%edx - movl %eax,%ebp + addl 16(%esp),%ecx pxor %xmm5,%xmm1 -.byte 102,15,58,15,223,8 + punpcklqdq %xmm0,%xmm3 + andl %eax,%esi xorl %ebx,%eax - addl 16(%esp),%ecx - andl %ebx,%ebp + rorl $7,%edi pxor %xmm2,%xmm1 movdqa %xmm5,80(%esp) - andl %eax,%esi - rorl $7,%edi - movdqa %xmm4,%xmm5 - paddd %xmm0,%xmm4 - addl %ebp,%ecx movl %edx,%ebp - pxor %xmm3,%xmm1 + xorl %eax,%esi roll $5,%edx + movdqa %xmm4,%xmm5 addl %esi,%ecx - xorl %ebx,%eax + paddd %xmm0,%xmm4 + xorl %edi,%ebp + pxor %xmm3,%xmm1 + xorl %eax,%edi addl %edx,%ecx + addl 20(%esp),%ebx + andl %edi,%ebp movdqa %xmm1,%xmm3 movdqa %xmm4,(%esp) - movl %edi,%esi xorl %eax,%edi - addl 20(%esp),%ebx - andl %eax,%esi - pslld $2,%xmm1 - andl %edi,%ebp rorl $7,%edx - psrld $30,%xmm3 - addl %esi,%ebx movl %ecx,%esi + xorl %edi,%ebp roll $5,%ecx + pslld $2,%xmm1 addl %ebp,%ebx - xorl %eax,%edi - addl %ecx,%ebx - por %xmm3,%xmm1 - movl %edx,%ebp + xorl %edx,%esi + psrld $30,%xmm3 xorl %edi,%edx - movdqa 96(%esp),%xmm3 + addl %ecx,%ebx addl 24(%esp),%eax - andl %edi,%ebp andl %edx,%esi + xorl %edi,%edx rorl $7,%ecx - addl %ebp,%eax - movdqa %xmm1,%xmm4 + por %xmm3,%xmm1 movl %ebx,%ebp + xorl %edx,%esi + movdqa 96(%esp),%xmm3 roll $5,%ebx addl %esi,%eax - xorl %edi,%edx - addl %ebx,%eax - movl %ecx,%esi + xorl %ecx,%ebp xorl %edx,%ecx + addl %ebx,%eax + pshufd $238,%xmm0,%xmm4 addl 28(%esp),%edi - andl %edx,%esi andl %ecx,%ebp + xorl %edx,%ecx rorl $7,%ebx - addl %esi,%edi movl %eax,%esi + xorl %ecx,%ebp roll $5,%eax addl %ebp,%edi - xorl %edx,%ecx + xorl %ebx,%esi + xorl %ecx,%ebx addl %eax,%edi - movl %ebx,%ebp + addl 32(%esp),%edx pxor %xmm6,%xmm2 -.byte 102,15,58,15,224,8 + punpcklqdq %xmm1,%xmm4 + andl %ebx,%esi xorl %ecx,%ebx - addl 32(%esp),%edx - andl %ecx,%ebp + rorl $7,%eax pxor %xmm3,%xmm2 movdqa %xmm6,96(%esp) - andl %ebx,%esi - rorl $7,%eax - movdqa %xmm5,%xmm6 - paddd %xmm1,%xmm5 - addl %ebp,%edx movl %edi,%ebp - pxor %xmm4,%xmm2 + xorl %ebx,%esi roll $5,%edi + movdqa %xmm5,%xmm6 addl %esi,%edx - xorl %ecx,%ebx + paddd %xmm1,%xmm5 + xorl %eax,%ebp + pxor %xmm4,%xmm2 + xorl %ebx,%eax addl %edi,%edx + addl 36(%esp),%ecx + andl %eax,%ebp movdqa %xmm2,%xmm4 movdqa %xmm5,16(%esp) - movl %eax,%esi xorl %ebx,%eax - addl 36(%esp),%ecx - andl %ebx,%esi - pslld $2,%xmm2 - andl %eax,%ebp rorl $7,%edi - psrld $30,%xmm4 - addl %esi,%ecx movl %edx,%esi + xorl %eax,%ebp roll $5,%edx + pslld $2,%xmm2 addl %ebp,%ecx - xorl %ebx,%eax - addl %edx,%ecx - por %xmm4,%xmm2 - movl %edi,%ebp + xorl %edi,%esi + psrld $30,%xmm4 xorl %eax,%edi - movdqa 64(%esp),%xmm4 + addl %edx,%ecx addl 40(%esp),%ebx - andl %eax,%ebp andl %edi,%esi + xorl %eax,%edi rorl $7,%edx - addl %ebp,%ebx - movdqa %xmm2,%xmm5 + por %xmm4,%xmm2 movl %ecx,%ebp + xorl %edi,%esi + movdqa 64(%esp),%xmm4 roll $5,%ecx addl %esi,%ebx - xorl %eax,%edi - addl %ecx,%ebx - movl %edx,%esi + xorl %edx,%ebp xorl %edi,%edx + addl %ecx,%ebx + pshufd $238,%xmm1,%xmm5 addl 44(%esp),%eax - andl %edi,%esi andl %edx,%ebp + xorl %edi,%edx rorl $7,%ecx - addl %esi,%eax movl %ebx,%esi + xorl %edx,%ebp roll $5,%ebx addl %ebp,%eax - xorl %edi,%edx + xorl %edx,%esi addl %ebx,%eax addl 48(%esp),%edi pxor %xmm7,%xmm3 -.byte 102,15,58,15,233,8 - xorl %edx,%esi + punpcklqdq %xmm2,%xmm5 + xorl %ecx,%esi movl %eax,%ebp roll $5,%eax pxor %xmm4,%xmm3 movdqa %xmm7,64(%esp) - xorl %ecx,%esi - addl %eax,%edi + addl %esi,%edi + xorl %ecx,%ebp movdqa %xmm6,%xmm7 - paddd %xmm2,%xmm6 rorl $7,%ebx - addl %esi,%edi + paddd %xmm2,%xmm6 + addl %eax,%edi pxor %xmm5,%xmm3 addl 52(%esp),%edx - xorl %ecx,%ebp + xorl %ebx,%ebp movl %edi,%esi roll $5,%edi movdqa %xmm3,%xmm5 movdqa %xmm6,32(%esp) - xorl %ebx,%ebp - addl %edi,%edx - rorl $7,%eax addl %ebp,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx pslld $2,%xmm3 addl 56(%esp),%ecx - xorl %ebx,%esi + xorl %eax,%esi psrld $30,%xmm5 movl %edx,%ebp roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%edi addl %esi,%ecx + xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx por %xmm5,%xmm3 addl 60(%esp),%ebx - xorl %eax,%ebp + xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx - xorl %edi,%ebp - addl %ecx,%ebx - rorl $7,%edx addl %ebp,%ebx - addl (%esp),%eax - paddd %xmm3,%xmm7 xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl (%esp),%eax + xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx - xorl %edx,%esi - movdqa %xmm7,48(%esp) - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 4(%esp),%edi xorl %edx,%ebp + rorl $7,%ecx + paddd %xmm3,%xmm7 + addl %ebx,%eax + addl 4(%esp),%edi + xorl %ecx,%ebp movl %eax,%esi + movdqa %xmm7,48(%esp) roll $5,%eax - xorl %ecx,%ebp - addl %eax,%edi - rorl $7,%ebx addl %ebp,%edi - addl 8(%esp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 8(%esp),%edx + xorl %ebx,%esi movl %edi,%ebp roll $5,%edi - xorl %ebx,%esi - addl %edi,%edx - rorl $7,%eax addl %esi,%edx - addl 12(%esp),%ecx xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 12(%esp),%ecx + xorl %eax,%ebp movl %edx,%esi roll $5,%edx - xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edi addl %ebp,%ecx + xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx movl 196(%esp),%ebp cmpl 200(%esp),%ebp - je .L005done + je .L007done movdqa 160(%esp),%xmm7 movdqa 176(%esp),%xmm6 movdqu (%ebp),%xmm0 @@ -2388,113 +2542,112 @@ _sha1_block_data_order_ssse3: movl %ebp,196(%esp) movdqa %xmm7,96(%esp) addl 16(%esp),%ebx - xorl %eax,%esi -.byte 102,15,56,0,206 + xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx - paddd %xmm7,%xmm0 - xorl %edi,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - movdqa %xmm0,(%esp) - addl 20(%esp),%eax xorl %edi,%ebp - psubd %xmm7,%xmm0 + rorl $7,%edx +.byte 102,15,56,0,206 + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp movl %ebx,%esi + paddd %xmm7,%xmm0 roll $5,%ebx - xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax - addl 24(%esp),%edi xorl %edx,%esi + rorl $7,%ecx + movdqa %xmm0,(%esp) + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi movl %eax,%ebp + psubd %xmm7,%xmm0 roll $5,%eax - xorl %ecx,%esi - addl %eax,%edi - rorl $7,%ebx addl %esi,%edi - addl 28(%esp),%edx xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp movl %edi,%esi roll $5,%edi - xorl %ebx,%ebp - addl %edi,%edx - rorl $7,%eax addl %ebp,%edx - addl 32(%esp),%ecx xorl %ebx,%esi -.byte 102,15,56,0,214 + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi movl %edx,%ebp roll $5,%edx - paddd %xmm7,%xmm1 - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%edi addl %esi,%ecx - movdqa %xmm1,16(%esp) - addl 36(%esp),%ebx xorl %eax,%ebp - psubd %xmm7,%xmm1 + rorl $7,%edi +.byte 102,15,56,0,214 + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp movl %ecx,%esi + paddd %xmm7,%xmm1 roll $5,%ecx - xorl %edi,%ebp - addl %ecx,%ebx - rorl $7,%edx addl %ebp,%ebx - addl 40(%esp),%eax xorl %edi,%esi + rorl $7,%edx + movdqa %xmm1,16(%esp) + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi movl %ebx,%ebp + psubd %xmm7,%xmm1 roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%esp),%edi xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp movl %eax,%esi roll $5,%eax - xorl %ecx,%ebp - addl %eax,%edi - rorl $7,%ebx addl %ebp,%edi - addl 48(%esp),%edx xorl %ecx,%esi -.byte 102,15,56,0,222 + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi movl %edi,%ebp roll $5,%edi - paddd %xmm7,%xmm2 - xorl %ebx,%esi - addl %edi,%edx - rorl $7,%eax addl %esi,%edx - movdqa %xmm2,32(%esp) - addl 52(%esp),%ecx xorl %ebx,%ebp - psubd %xmm7,%xmm2 + rorl $7,%eax +.byte 102,15,56,0,222 + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp movl %edx,%esi + paddd %xmm7,%xmm2 roll $5,%edx - xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edi addl %ebp,%ecx - addl 56(%esp),%ebx xorl %eax,%esi + rorl $7,%edi + movdqa %xmm2,32(%esp) + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi movl %ecx,%ebp + psubd %xmm7,%xmm2 roll $5,%ecx - xorl %edi,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%esp),%eax xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx - xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax addl 4(%ebp),%esi @@ -2504,109 +2657,112 @@ _sha1_block_data_order_ssse3: movl %esi,4(%ebp) addl 16(%ebp),%edi movl %ecx,8(%ebp) - movl %esi,%ebx + movl %ecx,%ebx movl %edx,12(%ebp) + xorl %edx,%ebx movl %edi,16(%ebp) - movdqa %xmm1,%xmm4 - jmp .L004loop + movl %esi,%ebp + pshufd $238,%xmm0,%xmm4 + andl %ebx,%esi + movl %ebp,%ebx + jmp .L006loop .align 16 -.L005done: +.L007done: addl 16(%esp),%ebx - xorl %eax,%esi + xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx - xorl %edi,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 20(%esp),%eax xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 20(%esp),%eax + xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx - xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax - addl 24(%esp),%edi xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%esp),%edi + xorl %ecx,%esi movl %eax,%ebp roll $5,%eax - xorl %ecx,%esi - addl %eax,%edi - rorl $7,%ebx addl %esi,%edi - addl 28(%esp),%edx xorl %ecx,%ebp + rorl $7,%ebx + addl %eax,%edi + addl 28(%esp),%edx + xorl %ebx,%ebp movl %edi,%esi roll $5,%edi - xorl %ebx,%ebp - addl %edi,%edx - rorl $7,%eax addl %ebp,%edx - addl 32(%esp),%ecx xorl %ebx,%esi + rorl $7,%eax + addl %edi,%edx + addl 32(%esp),%ecx + xorl %eax,%esi movl %edx,%ebp roll $5,%edx - xorl %eax,%esi - addl %edx,%ecx - rorl $7,%edi addl %esi,%ecx - addl 36(%esp),%ebx xorl %eax,%ebp + rorl $7,%edi + addl %edx,%ecx + addl 36(%esp),%ebx + xorl %edi,%ebp movl %ecx,%esi roll $5,%ecx - xorl %edi,%ebp - addl %ecx,%ebx - rorl $7,%edx addl %ebp,%ebx - addl 40(%esp),%eax xorl %edi,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%esp),%eax + xorl %edx,%esi movl %ebx,%ebp roll $5,%ebx - xorl %edx,%esi - addl %ebx,%eax - rorl $7,%ecx addl %esi,%eax - addl 44(%esp),%edi xorl %edx,%ebp + rorl $7,%ecx + addl %ebx,%eax + addl 44(%esp),%edi + xorl %ecx,%ebp movl %eax,%esi roll $5,%eax - xorl %ecx,%ebp - addl %eax,%edi - rorl $7,%ebx addl %ebp,%edi - addl 48(%esp),%edx xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%edi + addl 48(%esp),%edx + xorl %ebx,%esi movl %edi,%ebp roll $5,%edi - xorl %ebx,%esi - addl %edi,%edx - rorl $7,%eax addl %esi,%edx - addl 52(%esp),%ecx xorl %ebx,%ebp + rorl $7,%eax + addl %edi,%edx + addl 52(%esp),%ecx + xorl %eax,%ebp movl %edx,%esi roll $5,%edx - xorl %eax,%ebp - addl %edx,%ecx - rorl $7,%edi addl %ebp,%ecx - addl 56(%esp),%ebx xorl %eax,%esi + rorl $7,%edi + addl %edx,%ecx + addl 56(%esp),%ebx + xorl %edi,%esi movl %ecx,%ebp roll $5,%ecx - xorl %edi,%esi - addl %ecx,%ebx - rorl $7,%edx addl %esi,%ebx - addl 60(%esp),%eax xorl %edi,%ebp + rorl $7,%edx + addl %ecx,%ebx + addl 60(%esp),%eax + xorl %edx,%ebp movl %ebx,%esi roll $5,%ebx - xorl %edx,%ebp - addl %ebx,%eax - rorl $7,%ecx addl %ebp,%eax + rorl $7,%ecx + addl %ebx,%eax movl 192(%esp),%ebp addl (%ebp),%eax movl 204(%esp),%esp @@ -2632,8 +2788,9 @@ _sha1_block_data_order_ssse3: .long 2400959708,2400959708,2400959708,2400959708 .long 3395469782,3395469782,3395469782,3395469782 .long 66051,67438087,134810123,202182159 +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 .byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 .byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 .byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/sha256-586.s b/secure/lib/libcrypto/i386/sha256-586.s index 7ea3748..a3b82f8 100644 --- a/secure/lib/libcrypto/i386/sha256-586.s +++ b/secure/lib/libcrypto/i386/sha256-586.s @@ -26,234 +26,4553 @@ sha256_block_data_order: movl %edi,4(%esp) movl %eax,8(%esp) movl %ebx,12(%esp) + leal OPENSSL_ia32cap_P,%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + movl 8(%edx),%edx + testl $16777216,%ecx + jz .L003no_xmm + andl $1073741824,%ecx + andl $268435968,%ebx + testl $536870912,%edx + jnz .L004shaext + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + testl $512,%ebx + jnz .L005SSSE3 +.L003no_xmm: + subl %edi,%eax + cmpl $256,%eax + jae .L006unrolled + jmp .L002loop .align 16 .L002loop: movl (%edi),%eax movl 4(%edi),%ebx movl 8(%edi),%ecx - movl 12(%edi),%edx bswap %eax + movl 12(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 16(%edi),%eax movl 20(%edi),%ebx movl 24(%edi),%ecx - movl 28(%edi),%edx bswap %eax + movl 28(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 32(%edi),%eax movl 36(%edi),%ebx movl 40(%edi),%ecx - movl 44(%edi),%edx bswap %eax + movl 44(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx movl 48(%edi),%eax movl 52(%edi),%ebx movl 56(%edi),%ecx - movl 60(%edi),%edx bswap %eax + movl 60(%edi),%edx bswap %ebx - bswap %ecx - bswap %edx pushl %eax + bswap %ecx pushl %ebx + bswap %edx pushl %ecx pushl %edx addl $64,%edi - subl $32,%esp - movl %edi,100(%esp) + leal -36(%esp),%esp + movl %edi,104(%esp) movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx movl 12(%esi),%edi - movl %ebx,4(%esp) - movl %ecx,8(%esp) - movl %edi,12(%esp) + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) movl 16(%esi),%edx movl 20(%esi),%ebx movl 24(%esi),%ecx movl 28(%esi),%edi - movl %ebx,20(%esp) - movl %ecx,24(%esp) - movl %edi,28(%esp) + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) .align 16 -.L00300_15: - movl 92(%esp),%ebx +.L00700_15: movl %edx,%ecx + movl 24(%esp),%esi rorl $14,%ecx - movl 20(%esp),%esi + movl 28(%esp),%edi xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx rorl $5,%ecx - xorl %edx,%ecx - rorl $6,%ecx - movl 24(%esp),%edi - addl %ecx,%ebx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %edx,16(%esp) + rorl $6,%edx movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00700_15 + movl 156(%esp),%ecx + jmp .L00816_63 +.align 16 +.L00816_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx andl %edx,%esi - movl 12(%esp),%edx + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx xorl %edi,%esi - movl %eax,%edi + rorl $6,%edx + movl %eax,%ecx addl %esi,%ebx rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00816_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L006unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L009grand_loop +.align 16 +.L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp rorl $2,%ecx - addl %ebx,%edx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx movl 8(%esp),%edi - addl %ecx,%ebx + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx movl %eax,%ecx - subl $4,%esp - orl %esi,%eax + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax - addl $4,%ebp - addl %ebx,%eax - addl %esi,%edx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx addl %esi,%eax - cmpl $3248222580,%esi - jne .L00300_15 - movl 152(%esp),%ebx -.align 16 -.L00416_63: - movl %ebx,%esi - movl 100(%esp),%ecx + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx rorl $11,%esi movl %ecx,%edi + rorl $2,%ecx xorl %ebx,%esi + shrl $3,%ebx rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi shrl $3,%ebx - rorl $2,%edi + rorl $7,%esi + xorl %edi,%ecx xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi xorl %ecx,%edi - rorl $17,%edi - shrl $10,%ecx - addl 156(%esp),%ebx + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx xorl %ecx,%edi - addl 120(%esp),%ebx + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx addl %edi,%ebx - rorl $14,%ecx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi movl 20(%esp),%esi - xorl %edx,%ecx - rorl $5,%ecx + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx movl %ebx,92(%esp) - xorl %edx,%ecx - rorl $6,%ecx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx movl 24(%esp),%edi - addl %ecx,%ebx + xorl %ecx,%edx + movl %ebx,32(%esp) xorl %edi,%esi - movl %edx,16(%esp) + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx movl %eax,%ecx - andl %edx,%esi - movl 12(%esp),%edx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx xorl %edi,%esi - movl %eax,%edi - addl %esi,%ebx + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi rorl $2,%ecx - addl %ebx,%edx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx movl 8(%esp),%edi - addl %ecx,%ebx + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx movl %eax,%ecx - subl $4,%esp - orl %esi,%eax + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx andl %esi,%ecx - andl %edi,%eax - movl (%ebp),%esi - orl %ecx,%eax - addl $4,%ebp - addl %ebx,%eax - movl 152(%esp),%ebx - addl %esi,%edx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx addl %esi,%eax - cmpl $3329325298,%esi - jne .L00416_63 - movl 352(%esp),%esi - movl 4(%esp),%ebx + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx addl (%esi),%eax - addl 4(%esi),%ebx - addl 8(%esi),%ecx - addl 12(%esi),%edi + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx movl %eax,(%esi) - movl %ebx,4(%esi) - movl %ecx,8(%esi) - movl %edi,12(%esi) - movl 20(%esp),%eax + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi movl 24(%esp),%ebx movl 28(%esp),%ecx - movl 356(%esp),%edi addl 16(%esi),%edx - addl 20(%esi),%eax + addl 20(%esi),%edi addl 24(%esi),%ebx addl 28(%esi),%ecx movl %edx,16(%esi) - movl %eax,20(%esi) + movl %edi,20(%esi) movl %ebx,24(%esi) movl %ecx,28(%esi) - addl $352,%esp - subl $256,%ebp - cmpl 8(%esp),%edi - jb .L002loop - movl 12(%esp),%esp + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L004shaext: + subl $32,%esp + movdqu (%esi),%xmm1 + leal 128(%ebp),%ebp + movdqu 16(%esi),%xmm2 + movdqa 128(%ebp),%xmm7 + pshufd $27,%xmm1,%xmm0 + pshufd $177,%xmm1,%xmm1 + pshufd $27,%xmm2,%xmm2 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .L010loop_shaext +.align 16 +.L010loop_shaext: + movdqu (%edi),%xmm3 + movdqu 16(%edi),%xmm4 + movdqu 32(%edi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%edi),%xmm6 + movdqa %xmm2,16(%esp) + movdqa -128(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + nop + movdqa %xmm1,(%esp) +.byte 15,56,203,202 + movdqa -112(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + leal 64(%edi),%edi +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -80(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa -64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa -48(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa -32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa -16(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa (%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 16(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 32(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 48(%ebp),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 64(%ebp),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 80(%ebp),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + movdqa 96(%ebp),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa 128(%ebp),%xmm7 +.byte 15,56,203,202 + movdqa 112(%ebp),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $14,%xmm0,%xmm0 + cmpl %edi,%eax + nop +.byte 15,56,203,202 + paddd 16(%esp),%xmm2 + paddd (%esp),%xmm1 + jnz .L010loop_shaext + pshufd $177,%xmm2,%xmm2 + pshufd $27,%xmm1,%xmm7 + pshufd $177,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + movl 44(%esp),%esp + movdqu %xmm1,(%esi) + movdqu %xmm2,16(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L005SSSE3: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,4(%esp) + xorl %ecx,%ebx + movl %ecx,8(%esp) + movl %edi,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%edi + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ecx,24(%esp) + movl %esi,28(%esp) + movdqa 256(%ebp),%xmm7 + jmp .L011grand_ssse3 +.align 16 +.L011grand_ssse3: + movdqu (%edi),%xmm0 + movdqu 16(%edi),%xmm1 + movdqu 32(%edi),%xmm2 + movdqu 48(%edi),%xmm3 + addl $64,%edi +.byte 102,15,56,0,199 + movl %edi,100(%esp) +.byte 102,15,56,0,207 + movdqa (%ebp),%xmm4 +.byte 102,15,56,0,215 + movdqa 16(%ebp),%xmm5 + paddd %xmm0,%xmm4 +.byte 102,15,56,0,223 + movdqa 32(%ebp),%xmm6 + paddd %xmm1,%xmm5 + movdqa 48(%ebp),%xmm7 + movdqa %xmm4,32(%esp) + paddd %xmm2,%xmm6 + movdqa %xmm5,48(%esp) + paddd %xmm3,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + jmp .L012ssse3_00_47 +.align 16 +.L012ssse3_00_47: + addl $64,%ebp + movl %edx,%ecx + movdqa %xmm1,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm3,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,224,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,250,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm3,%xmm7 + xorl %esi,%ecx + addl 32(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm0 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm0 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm0,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa (%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm0 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm0,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,32(%esp) + movl %edx,%ecx + movdqa %xmm2,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm0,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,225,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,251,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm0,%xmm7 + xorl %esi,%ecx + addl 48(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm1 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm1 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm1,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 16(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm1 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm1,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,48(%esp) + movl %edx,%ecx + movdqa %xmm3,%xmm4 + rorl $14,%edx + movl 20(%esp),%esi + movdqa %xmm1,%xmm7 + xorl %ecx,%edx + movl 24(%esp),%edi +.byte 102,15,58,15,226,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,248,4 + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 4(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %eax,(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm1,%xmm7 + xorl %esi,%ecx + addl 64(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 12(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl 16(%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,12(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm2 + movl %ebx,28(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 8(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm2 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + pshufd $80,%xmm2,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 4(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 32(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,4(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm2 + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + paddd %xmm2,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movdqa %xmm6,64(%esp) + movl %edx,%ecx + movdqa %xmm0,%xmm4 + rorl $14,%edx + movl 4(%esp),%esi + movdqa %xmm2,%xmm7 + xorl %ecx,%edx + movl 8(%esp),%edi +.byte 102,15,58,15,227,4 + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi +.byte 102,15,58,15,249,4 + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + movdqa %xmm4,%xmm5 + rorl $6,%edx + movl %eax,%ecx + movdqa %xmm4,%xmm6 + addl %edi,%edx + movl 20(%esp),%edi + psrld $3,%xmm4 + movl %eax,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %eax,16(%esp) + xorl %eax,%ecx + psrld $7,%xmm6 + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + pshufd $250,%xmm2,%xmm7 + xorl %esi,%ecx + addl 80(%esp),%edx + pslld $14,%xmm5 + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm4 + addl %edx,%ebx + addl 28(%esp),%edx + psrld $11,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm5,%xmm4 + movl (%esp),%esi + xorl %ecx,%edx + pslld $11,%xmm5 + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + pxor %xmm6,%xmm4 + andl %ecx,%esi + movl %ecx,28(%esp) + movdqa %xmm7,%xmm6 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + pxor %xmm5,%xmm4 + movl %ebx,%ecx + addl %edi,%edx + psrld $10,%xmm7 + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm4,%xmm3 + movl %ebx,12(%esp) + xorl %ebx,%ecx + psrlq $17,%xmm6 + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + pxor %xmm6,%xmm7 + andl %ebx,%eax + xorl %esi,%ecx + psrlq $2,%xmm6 + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%eax + addl 24(%esp),%edx + pshufd $128,%xmm7,%xmm7 + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + psrldq $8,%xmm7 + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + paddd %xmm7,%xmm3 + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + pshufd $80,%xmm3,%xmm7 + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + movdqa %xmm7,%xmm6 + rorl $11,%ecx + psrld $10,%xmm7 + andl %eax,%ebx + psrlq $17,%xmm6 + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + pxor %xmm6,%xmm7 + addl %edx,%ebx + addl 20(%esp),%edx + psrlq $2,%xmm6 + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + pxor %xmm6,%xmm7 + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + pshufd $8,%xmm7,%xmm7 + xorl %edi,%esi + rorl $5,%edx + movdqa 48(%ebp),%xmm6 + andl %ecx,%esi + movl %ecx,20(%esp) + pslldq $8,%xmm7 + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + paddd %xmm7,%xmm3 + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + paddd %xmm3,%xmm6 + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movdqa %xmm6,80(%esp) + cmpl $66051,64(%ebp) + jne .L012ssse3_00_47 + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 32(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 36(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 40(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 44(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 48(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 52(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 56(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 60(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 20(%esp),%esi + xorl %ecx,%edx + movl 24(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 4(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 28(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 64(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 12(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 16(%esp),%esi + xorl %ecx,%edx + movl 20(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,12(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl (%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,28(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 24(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 68(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 8(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 12(%esp),%esi + xorl %ecx,%edx + movl 16(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 28(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,24(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 20(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 72(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 4(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 8(%esp),%esi + xorl %ecx,%edx + movl 12(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,4(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 24(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,20(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 16(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 76(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl (%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 4(%esp),%esi + xorl %ecx,%edx + movl 8(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 20(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,16(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 12(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 80(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 28(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl (%esp),%esi + xorl %ecx,%edx + movl 4(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,28(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 16(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,12(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl 8(%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 84(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 24(%esp),%edx + addl %ecx,%eax + movl %edx,%ecx + rorl $14,%edx + movl 28(%esp),%esi + xorl %ecx,%edx + movl (%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%edx + movl 12(%esp),%edi + movl %eax,%esi + rorl $9,%ecx + movl %eax,8(%esp) + xorl %eax,%ecx + xorl %edi,%eax + addl 4(%esp),%edx + rorl $11,%ecx + andl %eax,%ebx + xorl %esi,%ecx + addl 88(%esp),%edx + xorl %edi,%ebx + rorl $2,%ecx + addl %edx,%ebx + addl 20(%esp),%edx + addl %ecx,%ebx + movl %edx,%ecx + rorl $14,%edx + movl 24(%esp),%esi + xorl %ecx,%edx + movl 28(%esp),%edi + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,20(%esp) + xorl %ecx,%edx + xorl %esi,%edi + rorl $6,%edx + movl %ebx,%ecx + addl %edi,%edx + movl 8(%esp),%edi + movl %ebx,%esi + rorl $9,%ecx + movl %ebx,4(%esp) + xorl %ebx,%ecx + xorl %edi,%ebx + addl (%esp),%edx + rorl $11,%ecx + andl %ebx,%eax + xorl %esi,%ecx + addl 92(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %edx,%eax + addl 16(%esp),%edx + addl %ecx,%eax + movl 96(%esp),%esi + xorl %edi,%ebx + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebx,4(%esp) + xorl %edi,%ebx + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %edi,20(%esp) + movl 28(%esp),%edi + movl %ecx,24(%esi) + addl 28(%esi),%edi + movl %ecx,24(%esp) + movl %edi,28(%esi) + movl %edi,28(%esp) + movl 100(%esp),%edi + movdqa 64(%ebp),%xmm7 + subl $192,%ebp + cmpl 104(%esp),%edi + jb .L011grand_ssse3 + movl 108(%esp),%esp popl %edi popl %esi popl %ebx popl %ebp ret -.align 64 -.L001K256: -.long 1116352408,1899447441,3049323471,3921009573 -.long 961987163,1508970993,2453635748,2870763221 -.long 3624381080,310598401,607225278,1426881987 -.long 1925078388,2162078206,2614888103,3248222580 -.long 3835390401,4022224774,264347078,604807628 -.long 770255983,1249150122,1555081692,1996064986 -.long 2554220882,2821834349,2952996808,3210313671 -.long 3336571891,3584528711,113926993,338241895 -.long 666307205,773529912,1294757372,1396182291 -.long 1695183700,1986661051,2177026350,2456956037 -.long 2730485921,2820302411,3259730800,3345764771 -.long 3516065817,3600352804,4094571909,275423344 -.long 430227734,506948616,659060556,883997877 -.long 958139571,1322822218,1537002063,1747873779 -.long 1955562222,2024104815,2227730452,2361852424 -.long 2428436474,2756734187,3204031479,3329325298 .size sha256_block_data_order,.-.L_sha256_block_data_order_begin -.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 -.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 -.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 -.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 -.byte 62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/sha512-586.s b/secure/lib/libcrypto/i386/sha512-586.s index a37f850..2dc6f1a 100644 --- a/secure/lib/libcrypto/i386/sha512-586.s +++ b/secure/lib/libcrypto/i386/sha512-586.s @@ -27,249 +27,2243 @@ sha512_block_data_order: movl %eax,8(%esp) movl %ebx,12(%esp) leal OPENSSL_ia32cap_P,%edx - btl $26,(%edx) - jnc .L002loop_x86 + movl (%edx),%ecx + testl $67108864,%ecx + jz .L002loop_x86 + movl 4(%edx),%edx movq (%esi),%mm0 + andl $16777216,%ecx movq 8(%esi),%mm1 + andl $512,%edx movq 16(%esi),%mm2 + orl %edx,%ecx movq 24(%esi),%mm3 movq 32(%esi),%mm4 movq 40(%esi),%mm5 movq 48(%esi),%mm6 movq 56(%esi),%mm7 + cmpl $16777728,%ecx + je .L003SSSE3 subl $80,%esp + jmp .L004loop_sse2 .align 16 -.L003loop_sse2: +.L004loop_sse2: movq %mm1,8(%esp) movq %mm2,16(%esp) movq %mm3,24(%esp) movq %mm5,40(%esp) movq %mm6,48(%esp) + pxor %mm1,%mm2 movq %mm7,56(%esp) - movl (%edi),%ecx - movl 4(%edi),%edx + movq %mm0,%mm3 + movl (%edi),%eax + movl 4(%edi),%ebx addl $8,%edi - bswap %ecx - bswap %edx - movl %ecx,76(%esp) - movl %edx,72(%esp) + movl $15,%edx + bswap %eax + bswap %ebx + jmp .L00500_14_sse2 .align 16 -.L00400_14_sse2: +.L00500_14_sse2: + movd %eax,%mm1 movl (%edi),%eax + movd %ebx,%mm7 movl 4(%edi),%ebx addl $8,%edi bswap %eax bswap %ebx - movl %eax,68(%esp) - movl %ebx,64(%esp) + punpckldq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 movq 40(%esp),%mm5 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 movq 48(%esp),%mm6 - movq 56(%esp),%mm7 + decl %edx + jnz .L00500_14_sse2 + movd %eax,%mm1 + movd %ebx,%mm7 + punpckldq %mm1,%mm7 movq %mm4,%mm1 - movq %mm4,%mm2 + pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) - psllq $23,%mm2 + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm3,%mm0 + movq %mm7,72(%esp) movq %mm1,%mm3 psrlq $4,%mm1 - pxor %mm2,%mm3 - psllq $23,%mm2 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 psrlq $23,%mm1 - pxor %mm2,%mm3 - psllq $4,%mm2 + paddq 56(%esp),%mm7 pxor %mm1,%mm3 + psllq $4,%mm4 paddq (%ebp),%mm7 - pxor %mm2,%mm3 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm2,%mm3 + movq %mm0,%mm2 + addl $8,%ebp + paddq %mm6,%mm3 + pxor %mm0,%mm0 + movl $32,%edx + jmp .L00616_79_sse2 +.align 16 +.L00616_79_sse2: + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm0 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm2 + addl $8,%ebp + movq 88(%esp),%mm5 + movq %mm7,%mm1 + psrlq $1,%mm7 + movq %mm5,%mm6 + psrlq $6,%mm5 + psllq $56,%mm1 + paddq %mm3,%mm2 + movq %mm7,%mm3 + psrlq $6,%mm7 + pxor %mm1,%mm3 + psllq $7,%mm1 + pxor %mm7,%mm3 + psrlq $1,%mm7 + pxor %mm1,%mm3 + movq %mm5,%mm1 + psrlq $13,%mm5 + pxor %mm3,%mm7 + psllq $3,%mm6 + pxor %mm5,%mm1 + paddq 200(%esp),%mm7 + pxor %mm6,%mm1 + psrlq $42,%mm5 + paddq 128(%esp),%mm7 + pxor %mm5,%mm1 + psllq $42,%mm6 + movq 40(%esp),%mm5 + pxor %mm6,%mm1 + movq 48(%esp),%mm6 + paddq %mm1,%mm7 + movq %mm4,%mm1 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) pand %mm4,%mm5 - movq 16(%esp),%mm2 + psllq $23,%mm4 + movq %mm7,72(%esp) + movq %mm1,%mm3 + psrlq $4,%mm1 pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + paddq (%ebp),%mm7 + pxor %mm4,%mm3 movq 24(%esp),%mm4 - paddq %mm5,%mm3 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + subl $8,%esp + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 192(%esp),%mm7 + paddq %mm6,%mm0 + addl $8,%ebp + decl %edx + jnz .L00616_79_sse2 + paddq %mm3,%mm0 + movq 8(%esp),%mm1 + movq 24(%esp),%mm3 + movq 40(%esp),%mm5 + movq 48(%esp),%mm6 + movq 56(%esp),%mm7 + pxor %mm1,%mm2 + paddq (%esi),%mm0 + paddq 8(%esi),%mm1 + paddq 16(%esi),%mm2 + paddq 24(%esi),%mm3 + paddq 32(%esi),%mm4 + paddq 40(%esi),%mm5 + paddq 48(%esi),%mm6 + paddq 56(%esi),%mm7 + movl $640,%eax + movq %mm0,(%esi) + movq %mm1,8(%esi) + movq %mm2,16(%esi) + movq %mm3,24(%esi) + movq %mm4,32(%esi) + movq %mm5,40(%esi) + movq %mm6,48(%esi) + movq %mm7,56(%esi) + leal (%esp,%eax,1),%esp + subl %eax,%ebp + cmpl 88(%esp),%edi + jb .L004loop_sse2 + movl 92(%esp),%esp + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003SSSE3: + leal -64(%esp),%edx + subl $256,%esp + movdqa 640(%ebp),%xmm1 + movdqu (%edi),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%edi),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%edi),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%edi),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%edi),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%edi),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%edi),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%edi),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movdqa %xmm2,-16(%edx) + nop +.align 32 +.L007loop_ssse3: + movdqa 16(%edx),%xmm2 + movdqa %xmm3,48(%edx) + leal 128(%ebp),%ebp + movq %mm1,8(%esp) + movl %edi,%ebx + movq %mm2,16(%esp) + leal 128(%edi),%edi + movq %mm3,24(%esp) + cmpl %eax,%edi + movq %mm5,40(%esp) + cmovbl %edi,%ebx + movq %mm6,48(%esp) + movl $4,%ecx + pxor %mm1,%mm2 + movq %mm7,56(%esp) + pxor %mm3,%mm3 + jmp .L00800_47_ssse3 +.align 32 +.L00800_47_ssse3: + movdqa %xmm5,%xmm3 + movdqa %xmm2,%xmm1 +.byte 102,15,58,15,208,8 + movdqa %xmm4,(%edx) +.byte 102,15,58,15,220,8 + movdqa %xmm2,%xmm4 + psrlq $7,%xmm2 + paddq %xmm3,%xmm0 + movdqa %xmm4,%xmm3 + psrlq $1,%xmm4 + psllq $56,%xmm3 + pxor %xmm4,%xmm2 + psrlq $7,%xmm4 + pxor %xmm3,%xmm2 + psllq $7,%xmm3 + pxor %xmm4,%xmm2 + movdqa %xmm7,%xmm4 + pxor %xmm3,%xmm2 + movdqa %xmm7,%xmm3 + psrlq $6,%xmm4 + paddq %xmm2,%xmm0 + movdqa %xmm7,%xmm2 + psrlq $19,%xmm3 + psllq $3,%xmm2 + pxor %xmm3,%xmm4 + psrlq $42,%xmm3 + pxor %xmm2,%xmm4 + psllq $42,%xmm2 + pxor %xmm3,%xmm4 + movdqa 32(%edx),%xmm3 + pxor %xmm2,%xmm4 + movdqa (%ebp),%xmm2 + movq %mm4,%mm1 + paddq %xmm4,%xmm0 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + paddq %xmm0,%xmm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 movq %mm0,%mm6 - paddq 72(%esp),%mm3 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm2,-128(%edx) + movdqa %xmm6,%xmm4 + movdqa %xmm3,%xmm2 +.byte 102,15,58,15,217,8 + movdqa %xmm5,16(%edx) +.byte 102,15,58,15,229,8 + movdqa %xmm3,%xmm5 + psrlq $7,%xmm3 + paddq %xmm4,%xmm1 + movdqa %xmm5,%xmm4 + psrlq $1,%xmm5 + psllq $56,%xmm4 + pxor %xmm5,%xmm3 + psrlq $7,%xmm5 + pxor %xmm4,%xmm3 + psllq $7,%xmm4 + pxor %xmm5,%xmm3 + movdqa %xmm0,%xmm5 + pxor %xmm4,%xmm3 + movdqa %xmm0,%xmm4 + psrlq $6,%xmm5 + paddq %xmm3,%xmm1 + movdqa %xmm0,%xmm3 + psrlq $19,%xmm4 + psllq $3,%xmm3 + pxor %xmm4,%xmm5 + psrlq $42,%xmm4 + pxor %xmm3,%xmm5 + psllq $42,%xmm3 + pxor %xmm4,%xmm5 + movdqa 48(%edx),%xmm4 + pxor %xmm3,%xmm5 + movdqa 16(%ebp),%xmm3 + movq %mm4,%mm1 + paddq %xmm5,%xmm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm1,%xmm3 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 + pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 + pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 - subl $8,%esp + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm3,-112(%edx) + movdqa %xmm7,%xmm5 + movdqa %xmm4,%xmm3 +.byte 102,15,58,15,226,8 + movdqa %xmm6,32(%edx) +.byte 102,15,58,15,238,8 + movdqa %xmm4,%xmm6 + psrlq $7,%xmm4 + paddq %xmm5,%xmm2 + movdqa %xmm6,%xmm5 + psrlq $1,%xmm6 + psllq $56,%xmm5 + pxor %xmm6,%xmm4 + psrlq $7,%xmm6 + pxor %xmm5,%xmm4 + psllq $7,%xmm5 + pxor %xmm6,%xmm4 + movdqa %xmm1,%xmm6 + pxor %xmm5,%xmm4 + movdqa %xmm1,%xmm5 + psrlq $6,%xmm6 + paddq %xmm4,%xmm2 + movdqa %xmm1,%xmm4 + psrlq $19,%xmm5 + psllq $3,%xmm4 + pxor %xmm5,%xmm6 + psrlq $42,%xmm5 + pxor %xmm4,%xmm6 + psllq $42,%xmm4 + pxor %xmm5,%xmm6 + movdqa (%edx),%xmm5 + pxor %xmm4,%xmm6 + movdqa 32(%ebp),%xmm4 + movq %mm4,%mm1 + paddq %xmm6,%xmm2 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm2,%xmm4 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm4,-96(%edx) + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm4 +.byte 102,15,58,15,235,8 + movdqa %xmm7,48(%edx) +.byte 102,15,58,15,247,8 + movdqa %xmm5,%xmm7 + psrlq $7,%xmm5 + paddq %xmm6,%xmm3 + movdqa %xmm7,%xmm6 + psrlq $1,%xmm7 + psllq $56,%xmm6 + pxor %xmm7,%xmm5 + psrlq $7,%xmm7 + pxor %xmm6,%xmm5 + psllq $7,%xmm6 + pxor %xmm7,%xmm5 + movdqa %xmm2,%xmm7 + pxor %xmm6,%xmm5 + movdqa %xmm2,%xmm6 + psrlq $6,%xmm7 + paddq %xmm5,%xmm3 + movdqa %xmm2,%xmm5 + psrlq $19,%xmm6 + psllq $3,%xmm5 + pxor %xmm6,%xmm7 + psrlq $42,%xmm6 + pxor %xmm5,%xmm7 + psllq $42,%xmm5 + pxor %xmm6,%xmm7 + movdqa 16(%edx),%xmm6 + pxor %xmm5,%xmm7 + movdqa 48(%ebp),%xmm5 + movq %mm4,%mm1 + paddq %xmm7,%xmm3 + movq -80(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm3,%xmm5 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 movq %mm0,%mm5 - por %mm2,%mm0 - pand %mm2,%mm5 - pand %mm1,%mm0 - por %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) paddq %mm5,%mm7 - movq %mm3,%mm0 - movb (%ebp),%dl - paddq %mm7,%mm0 - addl $8,%ebp - cmpb $53,%dl - jne .L00400_14_sse2 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 movq 40(%esp),%mm5 + paddq %mm6,%mm0 movq 48(%esp),%mm6 - movq 56(%esp),%mm7 + movdqa %xmm5,-80(%edx) + movdqa %xmm1,%xmm7 + movdqa %xmm6,%xmm5 +.byte 102,15,58,15,244,8 + movdqa %xmm0,(%edx) +.byte 102,15,58,15,248,8 + movdqa %xmm6,%xmm0 + psrlq $7,%xmm6 + paddq %xmm7,%xmm4 + movdqa %xmm0,%xmm7 + psrlq $1,%xmm0 + psllq $56,%xmm7 + pxor %xmm0,%xmm6 + psrlq $7,%xmm0 + pxor %xmm7,%xmm6 + psllq $7,%xmm7 + pxor %xmm0,%xmm6 + movdqa %xmm3,%xmm0 + pxor %xmm7,%xmm6 + movdqa %xmm3,%xmm7 + psrlq $6,%xmm0 + paddq %xmm6,%xmm4 + movdqa %xmm3,%xmm6 + psrlq $19,%xmm7 + psllq $3,%xmm6 + pxor %xmm7,%xmm0 + psrlq $42,%xmm7 + pxor %xmm6,%xmm0 + psllq $42,%xmm6 + pxor %xmm7,%xmm0 + movdqa 32(%edx),%xmm7 + pxor %xmm6,%xmm0 + movdqa 64(%ebp),%xmm6 movq %mm4,%mm1 - movq %mm4,%mm2 + paddq %xmm0,%xmm4 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) - psllq $23,%mm2 + paddq %xmm4,%xmm6 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 - pxor %mm2,%mm3 - psllq $23,%mm2 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 psrlq $23,%mm1 - pxor %mm2,%mm3 - psllq $4,%mm2 + paddq 56(%esp),%mm7 pxor %mm1,%mm3 - paddq (%ebp),%mm7 - pxor %mm2,%mm3 - pxor %mm6,%mm5 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) pand %mm4,%mm5 - movq 16(%esp),%mm2 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 pxor %mm6,%mm5 - movq 24(%esp),%mm4 - paddq %mm5,%mm3 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm6,-64(%edx) + movdqa %xmm2,%xmm0 + movdqa %xmm7,%xmm6 +.byte 102,15,58,15,253,8 + movdqa %xmm1,16(%edx) +.byte 102,15,58,15,193,8 + movdqa %xmm7,%xmm1 + psrlq $7,%xmm7 + paddq %xmm0,%xmm5 + movdqa %xmm1,%xmm0 + psrlq $1,%xmm1 + psllq $56,%xmm0 + pxor %xmm1,%xmm7 + psrlq $7,%xmm1 + pxor %xmm0,%xmm7 + psllq $7,%xmm0 + pxor %xmm1,%xmm7 + movdqa %xmm4,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm4,%xmm0 + psrlq $6,%xmm1 + paddq %xmm7,%xmm5 + movdqa %xmm4,%xmm7 + psrlq $19,%xmm0 + psllq $3,%xmm7 + pxor %xmm0,%xmm1 + psrlq $42,%xmm0 + pxor %xmm7,%xmm1 + psllq $42,%xmm7 + pxor %xmm0,%xmm1 + movdqa 48(%edx),%xmm0 + pxor %xmm7,%xmm1 + movdqa 80(%ebp),%xmm7 + movq %mm4,%mm1 + paddq %xmm1,%xmm5 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + paddq %xmm5,%xmm7 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm7,-48(%edx) + movdqa %xmm3,%xmm1 + movdqa %xmm0,%xmm7 +.byte 102,15,58,15,198,8 + movdqa %xmm2,32(%edx) +.byte 102,15,58,15,202,8 + movdqa %xmm0,%xmm2 + psrlq $7,%xmm0 + paddq %xmm1,%xmm6 + movdqa %xmm2,%xmm1 + psrlq $1,%xmm2 + psllq $56,%xmm1 + pxor %xmm2,%xmm0 + psrlq $7,%xmm2 + pxor %xmm1,%xmm0 + psllq $7,%xmm1 + pxor %xmm2,%xmm0 + movdqa %xmm5,%xmm2 + pxor %xmm1,%xmm0 + movdqa %xmm5,%xmm1 + psrlq $6,%xmm2 + paddq %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + psrlq $19,%xmm1 + psllq $3,%xmm0 + pxor %xmm1,%xmm2 + psrlq $42,%xmm1 + pxor %xmm0,%xmm2 + psllq $42,%xmm0 + pxor %xmm1,%xmm2 + movdqa (%edx),%xmm1 + pxor %xmm0,%xmm2 + movdqa 96(%ebp),%xmm0 + movq %mm4,%mm1 + paddq %xmm2,%xmm6 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + paddq %xmm6,%xmm0 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm0,-32(%edx) + movdqa %xmm4,%xmm2 + movdqa %xmm1,%xmm0 +.byte 102,15,58,15,207,8 + movdqa %xmm3,48(%edx) +.byte 102,15,58,15,211,8 + movdqa %xmm1,%xmm3 + psrlq $7,%xmm1 + paddq %xmm2,%xmm7 + movdqa %xmm3,%xmm2 + psrlq $1,%xmm3 + psllq $56,%xmm2 + pxor %xmm3,%xmm1 + psrlq $7,%xmm3 + pxor %xmm2,%xmm1 + psllq $7,%xmm2 + pxor %xmm3,%xmm1 + movdqa %xmm6,%xmm3 + pxor %xmm2,%xmm1 + movdqa %xmm6,%xmm2 + psrlq $6,%xmm3 + paddq %xmm1,%xmm7 + movdqa %xmm6,%xmm1 + psrlq $19,%xmm2 + psllq $3,%xmm1 + pxor %xmm2,%xmm3 + psrlq $42,%xmm2 + pxor %xmm1,%xmm3 + psllq $42,%xmm1 + pxor %xmm2,%xmm3 + movdqa 16(%edx),%xmm2 + pxor %xmm1,%xmm3 + movdqa 112(%ebp),%xmm1 + movq %mm4,%mm1 + paddq %xmm3,%xmm7 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + paddq %xmm7,%xmm1 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 40(%esp),%mm5 + paddq %mm6,%mm0 + movq 48(%esp),%mm6 + movdqa %xmm1,-16(%edx) + leal 128(%ebp),%ebp + decl %ecx + jnz .L00800_47_ssse3 + movdqa (%ebp),%xmm1 + leal -640(%ebp),%ebp + movdqu (%ebx),%xmm0 +.byte 102,15,56,0,193 + movdqa (%ebp),%xmm3 + movdqa %xmm1,%xmm2 + movdqu 16(%ebx),%xmm1 + paddq %xmm0,%xmm3 +.byte 102,15,56,0,202 + movq %mm4,%mm1 + movq -128(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,32(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 56(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 movq %mm0,%mm6 - paddq 72(%esp),%mm3 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -120(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm3,-128(%edx) + movdqa 16(%ebp),%xmm4 + movdqa %xmm2,%xmm3 + movdqu 32(%ebx),%xmm2 + paddq %xmm1,%xmm4 +.byte 102,15,56,0,211 + movq %mm4,%mm1 + movq -112(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 + pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 + pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 - subl $8,%esp + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -104(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm4,-112(%edx) + movdqa 32(%ebp),%xmm5 + movdqa %xmm3,%xmm4 + movdqu 48(%ebx),%xmm3 + paddq %xmm2,%xmm5 +.byte 102,15,56,0,220 + movq %mm4,%mm1 + movq -96(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 movq %mm0,%mm5 - por %mm2,%mm0 - movq 88(%esp),%mm6 - pand %mm2,%mm5 - pand %mm1,%mm0 - movq 192(%esp),%mm2 - por %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -88(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) paddq %mm5,%mm7 - movq %mm3,%mm0 - movb (%ebp),%dl - paddq %mm7,%mm0 - addl $8,%ebp -.align 16 -.L00516_79_sse2: - movq %mm2,%mm1 - psrlq $1,%mm2 - movq %mm6,%mm7 - psrlq $6,%mm6 - movq %mm2,%mm3 - psrlq $6,%mm2 - movq %mm6,%mm5 - psrlq $13,%mm6 - pxor %mm2,%mm3 - psrlq $1,%mm2 - pxor %mm6,%mm5 - psrlq $42,%mm6 - pxor %mm2,%mm3 - movq 200(%esp),%mm2 - psllq $56,%mm1 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm5,-96(%edx) + movdqa 48(%ebp),%xmm6 + movdqa %xmm4,%xmm5 + movdqu 64(%ebx),%xmm4 + paddq %xmm3,%xmm6 +.byte 102,15,56,0,229 + movq %mm4,%mm1 + movq -80(%edx),%mm7 pxor %mm6,%mm5 - psllq $3,%mm7 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 pxor %mm1,%mm3 - paddq 128(%esp),%mm2 - psllq $7,%mm1 - pxor %mm7,%mm5 - psllq $42,%mm7 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 pxor %mm1,%mm3 - pxor %mm7,%mm5 - paddq %mm5,%mm3 - paddq %mm2,%mm3 - movq %mm3,72(%esp) + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -72(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 movq 40(%esp),%mm5 + paddq %mm6,%mm0 movq 48(%esp),%mm6 - movq 56(%esp),%mm7 + movdqa %xmm6,-80(%edx) + movdqa 64(%ebp),%xmm7 + movdqa %xmm5,%xmm6 + movdqu 80(%ebx),%xmm5 + paddq %xmm4,%xmm7 +.byte 102,15,56,0,238 movq %mm4,%mm1 - movq %mm4,%mm2 + movq -64(%edx),%mm7 + pxor %mm6,%mm5 psrlq $14,%mm1 movq %mm4,32(%esp) - psllq $23,%mm2 + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 movq %mm1,%mm3 psrlq $4,%mm1 - pxor %mm2,%mm3 - psllq $23,%mm2 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 pxor %mm1,%mm3 + movq %mm0,(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 psrlq $23,%mm1 - pxor %mm2,%mm3 - psllq $4,%mm2 + paddq 56(%esp),%mm7 pxor %mm1,%mm3 - paddq (%ebp),%mm7 - pxor %mm2,%mm3 - pxor %mm6,%mm5 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 24(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 movq 8(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 32(%esp),%mm5 + paddq %mm6,%mm2 + movq 40(%esp),%mm6 + movq %mm4,%mm1 + movq -56(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,24(%esp) pand %mm4,%mm5 - movq 16(%esp),%mm2 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 pxor %mm6,%mm5 - movq 24(%esp),%mm4 - paddq %mm5,%mm3 - movq %mm0,(%esp) + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,56(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 48(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 16(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq (%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 24(%esp),%mm5 + paddq %mm6,%mm0 + movq 32(%esp),%mm6 + movdqa %xmm7,-64(%edx) + movdqa %xmm0,(%edx) + movdqa 80(%ebp),%xmm0 + movdqa %xmm6,%xmm7 + movdqu 96(%ebx),%xmm6 + paddq %xmm5,%xmm0 +.byte 102,15,56,0,247 + movq %mm4,%mm1 + movq -48(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,16(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,48(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 40(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 8(%esp),%mm4 paddq %mm7,%mm3 movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 movq %mm0,%mm6 - paddq 72(%esp),%mm3 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 56(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 16(%esp),%mm5 + paddq %mm6,%mm2 + movq 24(%esp),%mm6 + movq %mm4,%mm1 + movq -40(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,8(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,40(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 32(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq (%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 psrlq $28,%mm5 paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 psllq $25,%mm6 + movq 48(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 8(%esp),%mm5 + paddq %mm6,%mm0 + movq 16(%esp),%mm6 + movdqa %xmm0,-48(%edx) + movdqa %xmm1,16(%edx) + movdqa 96(%ebp),%xmm1 + movdqa %xmm7,%xmm0 + movdqu 112(%ebx),%xmm7 + paddq %xmm6,%xmm1 +.byte 102,15,56,0,248 + movq %mm4,%mm1 + movq -32(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,32(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 24(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 56(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 movq %mm5,%mm7 + psllq $25,%mm6 + movq 40(%esp),%mm1 psrlq $6,%mm5 pxor %mm6,%mm7 psllq $5,%mm6 pxor %mm5,%mm7 + pxor %mm1,%mm0 psrlq $5,%mm5 pxor %mm6,%mm7 + pand %mm0,%mm2 psllq $6,%mm6 pxor %mm5,%mm7 - subl $8,%esp + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq (%esp),%mm5 + paddq %mm6,%mm2 + movq 8(%esp),%mm6 + movq %mm4,%mm1 + movq -24(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,56(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,24(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 16(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 48(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 32(%esp),%mm1 + psrlq $6,%mm5 pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 + movq 56(%esp),%mm5 + paddq %mm6,%mm0 + movq (%esp),%mm6 + movdqa %xmm1,-32(%edx) + movdqa %xmm2,32(%edx) + movdqa 112(%ebp),%xmm2 + movdqa (%edx),%xmm0 + paddq %xmm7,%xmm2 + movq %mm4,%mm1 + movq -16(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,48(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm0 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm0,16(%esp) + paddq %mm5,%mm7 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq 8(%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 40(%esp),%mm4 + paddq %mm7,%mm3 movq %mm0,%mm5 - por %mm2,%mm0 - movq 88(%esp),%mm6 - pand %mm2,%mm5 - pand %mm1,%mm0 - movq 192(%esp),%mm2 - por %mm0,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm0,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 24(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm0,%mm2 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + pxor %mm7,%mm6 + movq 48(%esp),%mm5 + paddq %mm6,%mm2 + movq 56(%esp),%mm6 + movq %mm4,%mm1 + movq -8(%edx),%mm7 + pxor %mm6,%mm5 + psrlq $14,%mm1 + movq %mm4,40(%esp) + pand %mm4,%mm5 + psllq $23,%mm4 + paddq %mm3,%mm2 + movq %mm1,%mm3 + psrlq $4,%mm1 + pxor %mm6,%mm5 + pxor %mm4,%mm3 + psllq $23,%mm4 + pxor %mm1,%mm3 + movq %mm2,8(%esp) paddq %mm5,%mm7 - movq %mm3,%mm0 - movb (%ebp),%dl - paddq %mm7,%mm0 - addl $8,%ebp - cmpb $23,%dl - jne .L00516_79_sse2 - movq 8(%esp),%mm1 - movq 16(%esp),%mm2 - movq 24(%esp),%mm3 + pxor %mm4,%mm3 + psrlq $23,%mm1 + paddq (%esp),%mm7 + pxor %mm1,%mm3 + psllq $4,%mm4 + pxor %mm4,%mm3 + movq 32(%esp),%mm4 + paddq %mm7,%mm3 + movq %mm2,%mm5 + psrlq $28,%mm5 + paddq %mm3,%mm4 + movq %mm2,%mm6 + movq %mm5,%mm7 + psllq $25,%mm6 + movq 16(%esp),%mm1 + psrlq $6,%mm5 + pxor %mm6,%mm7 + psllq $5,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm2 + psrlq $5,%mm5 + pxor %mm6,%mm7 + pand %mm2,%mm0 + psllq $6,%mm6 + pxor %mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm7,%mm6 movq 40(%esp),%mm5 + paddq %mm6,%mm0 movq 48(%esp),%mm6 + movdqa %xmm2,-16(%edx) + movq 8(%esp),%mm1 + paddq %mm3,%mm0 + movq 24(%esp),%mm3 movq 56(%esp),%mm7 + pxor %mm1,%mm2 paddq (%esi),%mm0 paddq 8(%esi),%mm1 paddq 16(%esi),%mm2 @@ -286,12 +2280,10 @@ sha512_block_data_order: movq %mm5,40(%esi) movq %mm6,48(%esi) movq %mm7,56(%esi) - addl $640,%esp - subl $640,%ebp - cmpl 88(%esp),%edi - jb .L003loop_sse2 + cmpl %eax,%edi + jb .L007loop_ssse3 + movl 76(%edx),%esp emms - movl 92(%esp),%esp popl %edi popl %esi popl %ebx @@ -402,7 +2394,7 @@ sha512_block_data_order: movl $16,%ecx .long 2784229001 .align 16 -.L00600_15_x86: +.L00900_15_x86: movl 40(%esp),%ecx movl 44(%esp),%edx movl %ecx,%esi @@ -509,9 +2501,9 @@ sha512_block_data_order: subl $8,%esp leal 8(%ebp),%ebp cmpb $148,%dl - jne .L00600_15_x86 + jne .L00900_15_x86 .align 16 -.L00716_79_x86: +.L01016_79_x86: movl 312(%esp),%ecx movl 316(%esp),%edx movl %ecx,%esi @@ -684,7 +2676,7 @@ sha512_block_data_order: subl $8,%esp leal 8(%ebp),%ebp cmpb $23,%dl - jne .L00716_79_x86 + jne .L01016_79_x86 movl 840(%esp),%esi movl 844(%esp),%edi movl (%esi),%eax @@ -827,10 +2819,12 @@ sha512_block_data_order: .long 4234509866,1501505948 .long 987167468,1607167915 .long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 .size sha512_block_data_order,.-.L_sha512_block_data_order_begin .byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 .byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/vpaes-x86.s b/secure/lib/libcrypto/i386/vpaes-x86.s index 7264297..e1dda0f 100644 --- a/secure/lib/libcrypto/i386/vpaes-x86.s +++ b/secure/lib/libcrypto/i386/vpaes-x86.s @@ -74,33 +74,33 @@ _vpaes_encrypt_core: movdqa %xmm6,%xmm1 movdqa (%ebp),%xmm2 pandn %xmm0,%xmm1 - movdqu (%edx),%xmm5 - psrld $4,%xmm1 pand %xmm6,%xmm0 + movdqu (%edx),%xmm5 .byte 102,15,56,0,208 movdqa 16(%ebp),%xmm0 -.byte 102,15,56,0,193 pxor %xmm5,%xmm2 - pxor %xmm2,%xmm0 + psrld $4,%xmm1 addl $16,%edx +.byte 102,15,56,0,193 leal 192(%ebp),%ebx + pxor %xmm2,%xmm0 jmp .L000enc_entry .align 16 .L001enc_loop: movdqa 32(%ebp),%xmm4 -.byte 102,15,56,0,226 - pxor %xmm5,%xmm4 movdqa 48(%ebp),%xmm0 +.byte 102,15,56,0,226 .byte 102,15,56,0,195 - pxor %xmm4,%xmm0 + pxor %xmm5,%xmm4 movdqa 64(%ebp),%xmm5 -.byte 102,15,56,0,234 + pxor %xmm4,%xmm0 movdqa -64(%ebx,%ecx,1),%xmm1 +.byte 102,15,56,0,234 movdqa 80(%ebp),%xmm2 -.byte 102,15,56,0,211 - pxor %xmm5,%xmm2 movdqa (%ebx,%ecx,1),%xmm4 +.byte 102,15,56,0,211 movdqa %xmm0,%xmm3 + pxor %xmm5,%xmm2 .byte 102,15,56,0,193 addl $16,%edx pxor %xmm2,%xmm0 @@ -109,28 +109,28 @@ _vpaes_encrypt_core: pxor %xmm0,%xmm3 .byte 102,15,56,0,193 andl $48,%ecx - pxor %xmm3,%xmm0 subl $1,%eax + pxor %xmm3,%xmm0 .L000enc_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm5 pandn %xmm0,%xmm1 psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm5 .byte 102,15,56,0,232 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm5,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm5,%xmm3 .byte 102,15,56,0,224 - pxor %xmm5,%xmm4 movdqa %xmm7,%xmm2 + pxor %xmm5,%xmm4 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 - movdqu (%edx),%xmm5 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 + movdqu (%edx),%xmm5 pxor %xmm1,%xmm3 jnz .L001enc_loop movdqa 96(%ebp),%xmm4 @@ -146,8 +146,8 @@ _vpaes_encrypt_core: .type _vpaes_decrypt_core,@function .align 16 _vpaes_decrypt_core: - movl 240(%edx),%eax leal 608(%ebp),%ebx + movl 240(%edx),%eax movdqa %xmm6,%xmm1 movdqa -64(%ebx),%xmm2 pandn %xmm0,%xmm1 @@ -170,56 +170,56 @@ _vpaes_decrypt_core: .align 16 .L003dec_loop: movdqa -32(%ebx),%xmm4 + movdqa -16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa -16(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 - addl $16,%edx -.byte 102,15,56,0,197 movdqa (%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 16(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 16(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 - subl $1,%eax .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 32(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 48(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 48(%ebx),%xmm0 -.byte 102,15,56,0,195 - pxor %xmm4,%xmm0 .byte 102,15,56,0,197 +.byte 102,15,56,0,203 + pxor %xmm4,%xmm0 movdqa 64(%ebx),%xmm4 + pxor %xmm1,%xmm0 + movdqa 80(%ebx),%xmm1 .byte 102,15,56,0,226 - pxor %xmm0,%xmm4 - movdqa 80(%ebx),%xmm0 -.byte 102,15,56,0,195 +.byte 102,15,56,0,197 +.byte 102,15,56,0,203 pxor %xmm4,%xmm0 + addl $16,%edx .byte 102,15,58,15,237,12 + pxor %xmm1,%xmm0 + subl $1,%eax .L002dec_entry: movdqa %xmm6,%xmm1 + movdqa -32(%ebp),%xmm2 pandn %xmm0,%xmm1 - psrld $4,%xmm1 pand %xmm6,%xmm0 - movdqa -32(%ebp),%xmm2 + psrld $4,%xmm1 .byte 102,15,56,0,208 - pxor %xmm1,%xmm0 movdqa %xmm7,%xmm3 + pxor %xmm1,%xmm0 .byte 102,15,56,0,217 - pxor %xmm2,%xmm3 movdqa %xmm7,%xmm4 + pxor %xmm2,%xmm3 .byte 102,15,56,0,224 pxor %xmm2,%xmm4 movdqa %xmm7,%xmm2 .byte 102,15,56,0,211 - pxor %xmm0,%xmm2 movdqa %xmm7,%xmm3 + pxor %xmm0,%xmm2 .byte 102,15,56,0,220 - pxor %xmm1,%xmm3 movdqu (%edx),%xmm0 + pxor %xmm1,%xmm3 jnz .L003dec_loop movdqa 96(%ebx),%xmm4 .byte 102,15,56,0,226 @@ -328,12 +328,12 @@ _vpaes_schedule_core: .type _vpaes_schedule_192_smear,@function .align 16 _vpaes_schedule_192_smear: - pshufd $128,%xmm6,%xmm0 - pxor %xmm0,%xmm6 + pshufd $128,%xmm6,%xmm1 pshufd $254,%xmm7,%xmm0 + pxor %xmm1,%xmm6 + pxor %xmm1,%xmm1 pxor %xmm0,%xmm6 movdqa %xmm6,%xmm0 - pxor %xmm1,%xmm1 movhlps %xmm1,%xmm6 ret .size _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear diff --git a/secure/lib/libcrypto/i386/wp-mmx.s b/secure/lib/libcrypto/i386/wp-mmx.s index c0a42fd..6ceae96 100644 --- a/secure/lib/libcrypto/i386/wp-mmx.s +++ b/secure/lib/libcrypto/i386/wp-mmx.s @@ -67,228 +67,230 @@ whirlpool_block_mmx: movq 4096(%ebp,%esi,8),%mm0 movl (%esp),%eax movl 4(%esp),%ebx - movb %al,%cl - movb %ah,%dl + movzbl %al,%ecx + movzbl %ah,%edx + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 movq 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 8(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx movq 6(%ebp,%esi,8),%mm2 movq 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx movq 4(%ebp,%esi,8),%mm4 movq 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 12(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx movq 2(%ebp,%esi,8),%mm6 movq 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 16(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 20(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 24(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 28(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 32(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 36(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 40(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 44(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 48(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 52(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 56(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 60(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl movl 64(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl movl 68(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 movq %mm0,(%esp) @@ -299,226 +301,226 @@ whirlpool_block_mmx: movq %mm5,40(%esp) movq %mm6,48(%esp) movq %mm7,56(%esp) - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm0 pxor 7(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl movl 72(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm2 pxor 5(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm4 pxor 3(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl movl 76(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm6 pxor 1(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm1 pxor 7(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl movl 80(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm3 pxor 5(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm5 pxor 3(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl movl 84(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm7 pxor 1(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm2 pxor 7(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl movl 88(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm4 pxor 5(%ebp,%edi,8),%mm5 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm6 pxor 3(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl movl 92(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm0 pxor 1(%ebp,%edi,8),%mm1 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm3 pxor 7(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl movl 96(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm5 pxor 5(%ebp,%edi,8),%mm6 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm7 pxor 3(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl movl 100(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm1 pxor 1(%ebp,%edi,8),%mm2 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm4 pxor 7(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl movl 104(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm6 pxor 5(%ebp,%edi,8),%mm7 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm0 pxor 3(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl movl 108(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm2 pxor 1(%ebp,%edi,8),%mm3 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm5 pxor 7(%ebp,%edi,8),%mm6 - movb %al,%cl - movb %ah,%dl movl 112(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm7 pxor 5(%ebp,%edi,8),%mm0 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm1 pxor 3(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl movl 116(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm3 pxor 1(%ebp,%edi,8),%mm4 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm6 pxor 7(%ebp,%edi,8),%mm7 - movb %al,%cl - movb %ah,%dl movl 120(%esp),%eax leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm0 pxor 5(%ebp,%edi,8),%mm1 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm2 pxor 3(%ebp,%edi,8),%mm3 - movb %bl,%cl - movb %bh,%dl movl 124(%esp),%ebx leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm4 pxor 1(%ebp,%edi,8),%mm5 - movb %al,%cl - movb %ah,%dl + shrl $16,%eax leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi - shrl $16,%eax + movzbl %ah,%edx pxor (%ebp,%esi,8),%mm7 pxor 7(%ebp,%edi,8),%mm0 - movb %al,%cl - movb %ah,%dl leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi + movzbl %bh,%edx pxor 6(%ebp,%esi,8),%mm1 pxor 5(%ebp,%edi,8),%mm2 - movb %bl,%cl - movb %bh,%dl + shrl $16,%ebx leal (%ecx,%ecx,1),%esi + movzbl %bl,%ecx leal (%edx,%edx,1),%edi - shrl $16,%ebx + movzbl %bh,%edx pxor 4(%ebp,%esi,8),%mm3 pxor 3(%ebp,%edi,8),%mm4 - movb %bl,%cl - movb %bh,%dl leal (%ecx,%ecx,1),%esi + movzbl %al,%ecx leal (%edx,%edx,1),%edi + movzbl %ah,%edx pxor 2(%ebp,%esi,8),%mm5 pxor 1(%ebp,%edi,8),%mm6 leal 128(%esp),%ebx diff --git a/secure/lib/libcrypto/i386/x86-gf2m.s b/secure/lib/libcrypto/i386/x86-gf2m.s index 1fcfe38..b7177ed 100644 --- a/secure/lib/libcrypto/i386/x86-gf2m.s +++ b/secure/lib/libcrypto/i386/x86-gf2m.s @@ -341,4 +341,4 @@ bn_GF2m_mul_2x2: .byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 .byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 .byte 62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/x86-mont.s b/secure/lib/libcrypto/i386/x86-mont.s index 0eec75d..26f84f8 100644 --- a/secure/lib/libcrypto/i386/x86-mont.s +++ b/secure/lib/libcrypto/i386/x86-mont.s @@ -454,4 +454,4 @@ bn_mul_mont: .byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 .byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 .byte 111,114,103,62,0 -.comm OPENSSL_ia32cap_P,8,4 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/secure/lib/libcrypto/i386/x86cpuid.s b/secure/lib/libcrypto/i386/x86cpuid.s index 815ff69..69622ec 100644 --- a/secure/lib/libcrypto/i386/x86cpuid.s +++ b/secure/lib/libcrypto/i386/x86cpuid.s @@ -23,6 +23,8 @@ OPENSSL_ia32_cpuid: xorl %eax,%eax btl $21,%ecx jnc .L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) .byte 0x0f,0xa2 movl %eax,%edi xorl %eax,%eax @@ -73,28 +75,36 @@ OPENSSL_ia32_cpuid: andl $4026531839,%edx jmp .L002generic .L001intel: + cmpl $7,%edi + jb .L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +.L003cacheinfo: cmpl $4,%edi movl $-1,%edi - jb .L003nocacheinfo + jb .L004nocacheinfo movl $4,%eax movl $0,%ecx .byte 0x0f,0xa2 movl %eax,%edi shrl $14,%edi andl $4095,%edi -.L003nocacheinfo: +.L004nocacheinfo: movl $1,%eax xorl %ecx,%ecx .byte 0x0f,0xa2 andl $3220176895,%edx cmpl $0,%ebp - jne .L004notintel + jne .L005notintel orl $1073741824,%edx andb $15,%ah cmpb $15,%ah - jne .L004notintel + jne .L005notintel orl $1048576,%edx -.L004notintel: +.L005notintel: btl $28,%edx jnc .L002generic andl $4026531839,%edx @@ -111,20 +121,22 @@ OPENSSL_ia32_cpuid: movl %edx,%esi orl %ecx,%ebp btl $27,%ecx - jnc .L005clear_avx + jnc .L006clear_avx xorl %ecx,%ecx .byte 15,1,208 andl $6,%eax cmpl $6,%eax - je .L006done + je .L007done cmpl $2,%eax - je .L005clear_avx -.L007clear_xmm: + je .L006clear_avx +.L008clear_xmm: andl $4261412861,%ebp andl $4278190079,%esi -.L005clear_avx: +.L006clear_avx: andl $4026525695,%ebp -.L006done: + movl 20(%esp),%edi + andl $4294967263,8(%edi) +.L007done: movl %esi,%eax movl %ebp,%edx .L000nocpuid: @@ -143,9 +155,9 @@ OPENSSL_rdtsc: xorl %edx,%edx leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L008notsc + jnc .L009notsc .byte 0x0f,0x31 -.L008notsc: +.L009notsc: ret .size OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin .globl OPENSSL_instrument_halt @@ -155,14 +167,14 @@ OPENSSL_instrument_halt: .L_OPENSSL_instrument_halt_begin: leal OPENSSL_ia32cap_P,%ecx btl $4,(%ecx) - jnc .L009nohalt + jnc .L010nohalt .long 2421723150 andl $3,%eax - jnz .L009nohalt + jnz .L010nohalt pushfl popl %eax btl $9,%eax - jnc .L009nohalt + jnc .L010nohalt .byte 0x0f,0x31 pushl %edx pushl %eax @@ -172,7 +184,7 @@ OPENSSL_instrument_halt: sbbl 4(%esp),%edx addl $8,%esp ret -.L009nohalt: +.L010nohalt: xorl %eax,%eax xorl %edx,%edx ret @@ -185,21 +197,21 @@ OPENSSL_far_spin: pushfl popl %eax btl $9,%eax - jnc .L010nospin + jnc .L011nospin movl 4(%esp),%eax movl 8(%esp),%ecx .long 2430111262 xorl %eax,%eax movl (%ecx),%edx - jmp .L011spin + jmp .L012spin .align 16 -.L011spin: +.L012spin: incl %eax cmpl (%ecx),%edx - je .L011spin + je .L012spin .long 529567888 ret -.L010nospin: +.L011nospin: xorl %eax,%eax xorl %edx,%edx ret @@ -214,10 +226,10 @@ OPENSSL_wipe_cpu: leal OPENSSL_ia32cap_P,%ecx movl (%ecx),%ecx btl $1,(%ecx) - jnc .L012no_x87 + jnc .L013no_x87 andl $83886080,%ecx cmpl $83886080,%ecx - jne .L013no_sse2 + jne .L014no_sse2 pxor %xmm0,%xmm0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 @@ -226,9 +238,9 @@ OPENSSL_wipe_cpu: pxor %xmm5,%xmm5 pxor %xmm6,%xmm6 pxor %xmm7,%xmm7 -.L013no_sse2: +.L014no_sse2: .long 4007259865,4007259865,4007259865,4007259865,2430851995 -.L012no_x87: +.L013no_x87: leal 4(%esp),%eax ret .size OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin @@ -242,11 +254,11 @@ OPENSSL_atomic_add: pushl %ebx nop movl (%edx),%eax -.L014spin: +.L015spin: leal (%eax,%ecx,1),%ebx nop .long 447811568 - jne .L014spin + jne .L015spin movl %ebx,%eax popl %ebx ret @@ -287,32 +299,32 @@ OPENSSL_cleanse: movl 8(%esp),%ecx xorl %eax,%eax cmpl $7,%ecx - jae .L015lot + jae .L016lot cmpl $0,%ecx - je .L016ret -.L017little: + je .L017ret +.L018little: movb %al,(%edx) subl $1,%ecx leal 1(%edx),%edx - jnz .L017little -.L016ret: + jnz .L018little +.L017ret: ret .align 16 -.L015lot: +.L016lot: testl $3,%edx - jz .L018aligned + jz .L019aligned movb %al,(%edx) leal -1(%ecx),%ecx leal 1(%edx),%edx - jmp .L015lot -.L018aligned: + jmp .L016lot +.L019aligned: movl %eax,(%edx) leal -4(%ecx),%ecx testl $-4,%ecx leal 4(%edx),%edx - jnz .L018aligned + jnz .L019aligned cmpl $0,%ecx - jne .L017little + jne .L018little ret .size OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin .globl OPENSSL_ia32_rdrand @@ -321,15 +333,32 @@ OPENSSL_cleanse: OPENSSL_ia32_rdrand: .L_OPENSSL_ia32_rdrand_begin: movl $8,%ecx -.L019loop: +.L020loop: .byte 15,199,240 - jc .L020break - loop .L019loop -.L020break: + jc .L021break + loop .L020loop +.L021break: cmpl $0,%eax cmovel %ecx,%eax ret .size OPENSSL_ia32_rdrand,.-.L_OPENSSL_ia32_rdrand_begin -.comm OPENSSL_ia32cap_P,8,4 +.globl OPENSSL_ia32_rdseed +.type OPENSSL_ia32_rdseed,@function +.align 16 +OPENSSL_ia32_rdseed: +.L_OPENSSL_ia32_rdseed_begin: + movl $8,%ecx +.L022loop: +.byte 15,199,248 + jc .L023break + loop .L022loop +.L023break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.size OPENSSL_ia32_rdseed,.-.L_OPENSSL_ia32_rdseed_begin +.hidden OPENSSL_cpuid_setup +.hidden OPENSSL_ia32cap_P +.comm OPENSSL_ia32cap_P,16,4 .section .init call OPENSSL_cpuid_setup |