diff options
Diffstat (limited to 'secure/lib/libcrypto/amd64/sha1-x86_64.S')
-rw-r--r-- | secure/lib/libcrypto/amd64/sha1-x86_64.S | 1159 |
1 files changed, 1158 insertions, 1 deletions
diff --git a/secure/lib/libcrypto/amd64/sha1-x86_64.S b/secure/lib/libcrypto/amd64/sha1-x86_64.S index 421423a..eeb90e5 100644 --- a/secure/lib/libcrypto/amd64/sha1-x86_64.S +++ b/secure/lib/libcrypto/amd64/sha1-x86_64.S @@ -1,4 +1,5 @@ - # $FreeBSD$ +# $FreeBSD$ +# Do not modify. This file is auto-generated from sha1-x86_64.pl. .text @@ -10,6 +11,11 @@ sha1_block_data_order: movl OPENSSL_ia32cap_P+4(%rip),%r8d testl $512,%r8d jz .Lialu + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut jmp _ssse3_shortcut .align 16 @@ -2476,6 +2482,1157 @@ _ssse3_shortcut: .Lepilogue_ssse3: .byte 0xf3,0xc3 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.type sha1_block_data_order_avx,@function +.align 16 +sha1_block_data_order_avx: +_avx_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + vzeroupper + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm9 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm9,%xmm0,%xmm4 + vpaddd %xmm9,%xmm1,%xmm5 + vpaddd %xmm9,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.align 16 +.Loop_avx: + addl 0(%rsp),%ebp + xorl %edx,%ecx + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + shldl $5,%eax,%eax + vpaddd %xmm3,%xmm9,%xmm9 + andl %ecx,%esi + xorl %edx,%ecx + vpsrldq $4,%xmm3,%xmm8 + xorl %edx,%esi + addl %eax,%ebp + vpxor %xmm0,%xmm4,%xmm4 + shrdl $2,%ebx,%ebx + addl %esi,%ebp + vpxor %xmm2,%xmm8,%xmm8 + addl 4(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpxor %xmm8,%xmm4,%xmm4 + andl %ebx,%edi + xorl %ecx,%ebx + vmovdqa %xmm9,48(%rsp) + xorl %ecx,%edi + addl %ebp,%edx + vpsrld $31,%xmm4,%xmm8 + shrdl $7,%eax,%eax + addl %edi,%edx + addl 8(%rsp),%ecx + xorl %ebx,%eax + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + shldl $5,%edx,%edx + andl %eax,%esi + xorl %ebx,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + xorl %ebx,%esi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + addl 12(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpxor %xmm10,%xmm4,%xmm4 + andl %ebp,%edi + xorl %eax,%ebp + vmovdqa 0(%r11),%xmm10 + xorl %eax,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + addl 16(%rsp),%eax + xorl %ebp,%edx + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpaddd %xmm4,%xmm10,%xmm10 + andl %edx,%esi + xorl %ebp,%edx + vpsrldq $4,%xmm4,%xmm9 + xorl %ebp,%esi + addl %ebx,%eax + vpxor %xmm1,%xmm5,%xmm5 + shrdl $7,%ecx,%ecx + addl %esi,%eax + vpxor %xmm3,%xmm9,%xmm9 + addl 20(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + shldl $5,%eax,%eax + vpxor %xmm9,%xmm5,%xmm5 + andl %ecx,%edi + xorl %edx,%ecx + vmovdqa %xmm10,0(%rsp) + xorl %edx,%edi + addl %eax,%ebp + vpsrld $31,%xmm5,%xmm9 + shrdl $7,%ebx,%ebx + addl %edi,%ebp + addl 24(%rsp),%edx + xorl %ecx,%ebx + vpslldq $12,%xmm5,%xmm8 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + shldl $5,%ebp,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + vpsrld $30,%xmm8,%xmm10 + vpor %xmm9,%xmm5,%xmm5 + xorl %ecx,%esi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %esi,%edx + vpslld $2,%xmm8,%xmm8 + vpxor %xmm10,%xmm5,%xmm5 + addl 28(%rsp),%ecx + xorl %ebx,%eax + movl %edx,%esi + shldl $5,%edx,%edx + vpxor %xmm8,%xmm5,%xmm5 + andl %eax,%edi + xorl %ebx,%eax + vmovdqa 16(%r11),%xmm8 + xorl %ebx,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + addl 32(%rsp),%ebx + xorl %eax,%ebp + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm5,%xmm8,%xmm8 + andl %ebp,%esi + xorl %eax,%ebp + vpsrldq $4,%xmm5,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + vpxor %xmm2,%xmm6,%xmm6 + shrdl $7,%edx,%edx + addl %esi,%ebx + vpxor %xmm4,%xmm10,%xmm10 + addl 36(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpxor %xmm10,%xmm6,%xmm6 + andl %edx,%edi + xorl %ebp,%edx + vmovdqa %xmm8,16(%rsp) + xorl %ebp,%edi + addl %ebx,%eax + vpsrld $31,%xmm6,%xmm10 + shrdl $7,%ecx,%ecx + addl %edi,%eax + addl 40(%rsp),%ebp + xorl %edx,%ecx + vpslldq $12,%xmm6,%xmm9 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + shldl $5,%eax,%eax + andl %ecx,%esi + xorl %edx,%ecx + vpsrld $30,%xmm9,%xmm8 + vpor %xmm10,%xmm6,%xmm6 + xorl %edx,%esi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %esi,%ebp + vpslld $2,%xmm9,%xmm9 + vpxor %xmm8,%xmm6,%xmm6 + addl 44(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpxor %xmm9,%xmm6,%xmm6 + andl %ebx,%edi + xorl %ecx,%ebx + vmovdqa 16(%r11),%xmm9 + xorl %ecx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + addl 48(%rsp),%ecx + xorl %ebx,%eax + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm6,%xmm9,%xmm9 + andl %eax,%esi + xorl %ebx,%eax + vpsrldq $4,%xmm6,%xmm8 + xorl %ebx,%esi + addl %edx,%ecx + vpxor %xmm3,%xmm7,%xmm7 + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vpxor %xmm5,%xmm8,%xmm8 + addl 52(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpxor %xmm8,%xmm7,%xmm7 + andl %ebp,%edi + xorl %eax,%ebp + vmovdqa %xmm9,32(%rsp) + xorl %eax,%edi + addl %ecx,%ebx + vpsrld $31,%xmm7,%xmm8 + shrdl $7,%edx,%edx + addl %edi,%ebx + addl 56(%rsp),%eax + xorl %ebp,%edx + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + shldl $5,%ebx,%ebx + andl %edx,%esi + xorl %ebp,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + xorl %ebp,%esi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %esi,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + addl 60(%rsp),%ebp + xorl %edx,%ecx + movl %eax,%esi + shldl $5,%eax,%eax + vpxor %xmm10,%xmm7,%xmm7 + andl %ecx,%edi + xorl %edx,%ecx + vmovdqa 16(%r11),%xmm10 + xorl %edx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm9 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%edx + xorl %ecx,%ebx + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm1,%xmm0,%xmm0 + andl %ebx,%esi + xorl %ecx,%ebx + vmovdqa %xmm10,%xmm8 + vpaddd %xmm7,%xmm10,%xmm10 + xorl %ecx,%esi + addl %ebp,%edx + vpxor %xmm9,%xmm0,%xmm0 + shrdl $7,%eax,%eax + addl %esi,%edx + addl 4(%rsp),%ecx + xorl %ebx,%eax + vpsrld $30,%xmm0,%xmm9 + vmovdqa %xmm10,48(%rsp) + movl %edx,%esi + shldl $5,%edx,%edx + andl %eax,%edi + xorl %ebx,%eax + vpslld $2,%xmm0,%xmm0 + xorl %ebx,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + addl 8(%rsp),%ebx + xorl %eax,%ebp + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpor %xmm9,%xmm0,%xmm0 + andl %ebp,%esi + xorl %eax,%ebp + vmovdqa %xmm0,%xmm10 + xorl %eax,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edx + movl %ebx,%esi + shldl $5,%ebx,%ebx + andl %edx,%edi + xorl %ebp,%edx + xorl %ebp,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + vpalignr $8,%xmm7,%xmm0,%xmm10 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + xorl %ecx,%esi + addl %eax,%ebp + vmovdqa %xmm8,%xmm9 + vpaddd %xmm0,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + addl %esi,%ebp + vpxor %xmm10,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm10 + vmovdqa %xmm8,0(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + shldl $5,%edx,%edx + xorl %eax,%esi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vpor %xmm10,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %eax,%edi + vmovdqa %xmm1,%xmm8 + movl %ecx,%esi + shldl $5,%ecx,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + xorl %edx,%esi + addl %ebx,%eax + vmovdqa 32(%r11),%xmm10 + vpaddd %xmm1,%xmm9,%xmm9 + shrdl $7,%ecx,%ecx + addl %esi,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + xorl %ecx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + xorl %ebx,%esi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %esi,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %ebx,%edi + vmovdqa %xmm2,%xmm9 + movl %edx,%esi + shldl $5,%edx,%edx + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm9 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + xorl %ebp,%esi + addl %ecx,%ebx + vmovdqa %xmm10,%xmm8 + vpaddd %xmm2,%xmm10,%xmm10 + shrdl $7,%edx,%edx + addl %esi,%ebx + vpxor %xmm9,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm9 + vmovdqa %xmm10,32(%rsp) + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + xorl %ecx,%esi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %esi,%ebp + vpor %xmm9,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ecx,%edi + vmovdqa %xmm3,%xmm10 + movl %ebp,%esi + shldl $5,%ebp,%ebp + xorl %ebx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + vpalignr $8,%xmm2,%xmm3,%xmm10 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + xorl %eax,%esi + addl %edx,%ecx + vmovdqa %xmm8,%xmm9 + vpaddd %xmm3,%xmm8,%xmm8 + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vpxor %xmm10,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm10 + vmovdqa %xmm8,48(%rsp) + xorl %ebp,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + xorl %edx,%esi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %esi,%eax + vpor %xmm10,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %edx,%edi + vmovdqa %xmm4,%xmm8 + movl %eax,%esi + shldl $5,%eax,%eax + xorl %ecx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + xorl %ebx,%esi + addl %ebp,%edx + vmovdqa %xmm9,%xmm10 + vpaddd %xmm4,%xmm9,%xmm9 + shrdl $7,%eax,%eax + addl %esi,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + xorl %ebp,%edi + vmovdqa %xmm5,%xmm9 + movl %ebx,%esi + shldl $5,%ebx,%ebx + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + vpalignr $8,%xmm4,%xmm5,%xmm9 + vpxor %xmm2,%xmm6,%xmm6 + movl %ecx,%edi + xorl %edx,%ecx + addl 32(%rsp),%ebp + andl %edx,%edi + vpxor %xmm7,%xmm6,%xmm6 + andl %ecx,%esi + shrdl $7,%ebx,%ebx + vmovdqa %xmm10,%xmm8 + vpaddd %xmm5,%xmm10,%xmm10 + addl %edi,%ebp + movl %eax,%edi + vpxor %xmm9,%xmm6,%xmm6 + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + vpsrld $30,%xmm6,%xmm9 + vmovdqa %xmm10,16(%rsp) + movl %ebx,%esi + xorl %ecx,%ebx + addl 36(%rsp),%edx + andl %ecx,%esi + vpslld $2,%xmm6,%xmm6 + andl %ebx,%edi + shrdl $7,%eax,%eax + addl %esi,%edx + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + vpor %xmm9,%xmm6,%xmm6 + movl %eax,%edi + xorl %ebx,%eax + vmovdqa %xmm6,%xmm10 + addl 40(%rsp),%ecx + andl %ebx,%edi + andl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edi,%ecx + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + movl %ebp,%esi + xorl %eax,%ebp + addl 44(%rsp),%ebx + andl %eax,%esi + andl %ebp,%edi + shrdl $7,%edx,%edx + addl %esi,%ebx + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm10 + vpxor %xmm3,%xmm7,%xmm7 + movl %edx,%edi + xorl %ebp,%edx + addl 48(%rsp),%eax + andl %ebp,%edi + vpxor %xmm0,%xmm7,%xmm7 + andl %edx,%esi + shrdl $7,%ecx,%ecx + vmovdqa 48(%r11),%xmm9 + vpaddd %xmm6,%xmm8,%xmm8 + addl %edi,%eax + movl %ebx,%edi + vpxor %xmm10,%xmm7,%xmm7 + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + vpsrld $30,%xmm7,%xmm10 + vmovdqa %xmm8,32(%rsp) + movl %ecx,%esi + xorl %edx,%ecx + addl 52(%rsp),%ebp + andl %edx,%esi + vpslld $2,%xmm7,%xmm7 + andl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %esi,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + vpor %xmm10,%xmm7,%xmm7 + movl %ebx,%edi + xorl %ecx,%ebx + vmovdqa %xmm7,%xmm8 + addl 56(%rsp),%edx + andl %ecx,%edi + andl %ebx,%esi + shrdl $7,%eax,%eax + addl %edi,%edx + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + movl %eax,%esi + xorl %ebx,%eax + addl 60(%rsp),%ecx + andl %ebx,%esi + andl %eax,%edi + shrdl $7,%ebp,%ebp + addl %esi,%ecx + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + movl %ebp,%edi + xorl %eax,%ebp + addl 0(%rsp),%ebx + andl %eax,%edi + vpxor %xmm1,%xmm0,%xmm0 + andl %ebp,%esi + shrdl $7,%edx,%edx + vmovdqa %xmm9,%xmm10 + vpaddd %xmm7,%xmm9,%xmm9 + addl %edi,%ebx + movl %ecx,%edi + vpxor %xmm8,%xmm0,%xmm0 + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + xorl %ebp,%edx + addl 4(%rsp),%eax + andl %ebp,%esi + vpslld $2,%xmm0,%xmm0 + andl %edx,%edi + shrdl $7,%ecx,%ecx + addl %esi,%eax + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + vpor %xmm8,%xmm0,%xmm0 + movl %ecx,%edi + xorl %edx,%ecx + vmovdqa %xmm0,%xmm9 + addl 8(%rsp),%ebp + andl %edx,%edi + andl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %edi,%ebp + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + movl %ebx,%esi + xorl %ecx,%ebx + addl 12(%rsp),%edx + andl %ecx,%esi + andl %ebx,%edi + shrdl $7,%eax,%eax + addl %esi,%edx + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm9 + vpxor %xmm5,%xmm1,%xmm1 + movl %eax,%edi + xorl %ebx,%eax + addl 16(%rsp),%ecx + andl %ebx,%edi + vpxor %xmm2,%xmm1,%xmm1 + andl %eax,%esi + shrdl $7,%ebp,%ebp + vmovdqa %xmm10,%xmm8 + vpaddd %xmm0,%xmm10,%xmm10 + addl %edi,%ecx + movl %edx,%edi + vpxor %xmm9,%xmm1,%xmm1 + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + vpsrld $30,%xmm1,%xmm9 + vmovdqa %xmm10,0(%rsp) + movl %ebp,%esi + xorl %eax,%ebp + addl 20(%rsp),%ebx + andl %eax,%esi + vpslld $2,%xmm1,%xmm1 + andl %ebp,%edi + shrdl $7,%edx,%edx + addl %esi,%ebx + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + vpor %xmm9,%xmm1,%xmm1 + movl %edx,%edi + xorl %ebp,%edx + vmovdqa %xmm1,%xmm10 + addl 24(%rsp),%eax + andl %ebp,%edi + andl %edx,%esi + shrdl $7,%ecx,%ecx + addl %edi,%eax + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ebp,%edx + addl %ebx,%eax + movl %ecx,%esi + xorl %edx,%ecx + addl 28(%rsp),%ebp + andl %edx,%esi + andl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %esi,%ebp + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %edx,%ecx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm10 + vpxor %xmm6,%xmm2,%xmm2 + movl %ebx,%edi + xorl %ecx,%ebx + addl 32(%rsp),%edx + andl %ecx,%edi + vpxor %xmm3,%xmm2,%xmm2 + andl %ebx,%esi + shrdl $7,%eax,%eax + vmovdqa %xmm8,%xmm9 + vpaddd %xmm1,%xmm8,%xmm8 + addl %edi,%edx + movl %ebp,%edi + vpxor %xmm10,%xmm2,%xmm2 + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ecx,%ebx + addl %ebp,%edx + vpsrld $30,%xmm2,%xmm10 + vmovdqa %xmm8,16(%rsp) + movl %eax,%esi + xorl %ebx,%eax + addl 36(%rsp),%ecx + andl %ebx,%esi + vpslld $2,%xmm2,%xmm2 + andl %eax,%edi + shrdl $7,%ebp,%ebp + addl %esi,%ecx + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebx,%eax + addl %edx,%ecx + vpor %xmm10,%xmm2,%xmm2 + movl %ebp,%edi + xorl %eax,%ebp + vmovdqa %xmm2,%xmm8 + addl 40(%rsp),%ebx + andl %eax,%edi + andl %ebp,%esi + shrdl $7,%edx,%edx + addl %edi,%ebx + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %eax,%ebp + addl %ecx,%ebx + movl %edx,%esi + xorl %ebp,%edx + addl 44(%rsp),%eax + andl %ebp,%esi + andl %edx,%edi + shrdl $7,%ecx,%ecx + addl %esi,%eax + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ebp,%edx + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + xorl %ecx,%esi + addl %eax,%ebp + vmovdqa %xmm9,%xmm10 + vpaddd %xmm2,%xmm9,%xmm9 + shrdl $7,%ebx,%ebx + addl %esi,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + xorl %ebx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + shldl $5,%edx,%edx + xorl %eax,%esi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm10,%xmm10 + xorl %ebp,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + xorl %edx,%esi + movdqa %xmm10,48(%rsp) + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %esi,%eax + addl 4(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + xorl %ecx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + addl 8(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + xorl %ebx,%esi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %esi,%edx + addl 12(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + shldl $5,%edx,%edx + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r11),%xmm6 + vmovdqa 0(%r11),%xmm9 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %eax,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm9,%xmm0,%xmm4 + xorl %ebp,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + xorl %ecx,%esi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + xorl %ebx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm9,%xmm1,%xmm5 + xorl %eax,%esi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %esi,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + xorl %edx,%esi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + xorl %ecx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm9,%xmm2,%xmm6 + xorl %ebx,%esi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %esi,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + shldl $5,%edx,%edx + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + jmp .Loop_avx + +.align 16 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + addl 20(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + addl 24(%rsp),%ebp + xorl %edx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + xorl %ecx,%esi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %esi,%ebp + addl 28(%rsp),%edx + xorl %ecx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + xorl %ebx,%edi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %edi,%edx + addl 32(%rsp),%ecx + xorl %ebx,%esi + movl %edx,%edi + shldl $5,%edx,%edx + xorl %eax,%esi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %esi,%ecx + addl 36(%rsp),%ebx + xorl %eax,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + xorl %ebp,%edi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %edi,%ebx + addl 40(%rsp),%eax + xorl %ebp,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + xorl %edx,%esi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %esi,%eax + addl 44(%rsp),%ebp + xorl %edx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + xorl %ecx,%edi + addl %eax,%ebp + shrdl $7,%ebx,%ebx + addl %edi,%ebp + addl 48(%rsp),%edx + xorl %ecx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + xorl %ebx,%esi + addl %ebp,%edx + shrdl $7,%eax,%eax + addl %esi,%edx + addl 52(%rsp),%ecx + xorl %ebx,%edi + movl %edx,%esi + shldl $5,%edx,%edx + xorl %eax,%edi + addl %edx,%ecx + shrdl $7,%ebp,%ebp + addl %edi,%ecx + addl 56(%rsp),%ebx + xorl %eax,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + xorl %ebp,%esi + addl %ecx,%ebx + shrdl $7,%edx,%edx + addl %esi,%ebx + addl 60(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + xorl %edx,%edi + addl %ebx,%eax + shrdl $7,%ecx,%ecx + addl %edi,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_avx: + .byte 0xf3,0xc3 +.size sha1_block_data_order_avx,.-sha1_block_data_order_avx .align 64 K_XX_XX: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 |