diff options
Diffstat (limited to 'secure/lib/libcrypto/i386/aesni-x86.s')
-rw-r--r-- | secure/lib/libcrypto/i386/aesni-x86.s | 2449 |
1 files changed, 2449 insertions, 0 deletions
diff --git a/secure/lib/libcrypto/i386/aesni-x86.s b/secure/lib/libcrypto/i386/aesni-x86.s new file mode 100644 index 0000000..5b294bd --- /dev/null +++ b/secure/lib/libcrypto/i386/aesni-x86.s @@ -0,0 +1,2449 @@ + # $FreeBSD$ +.file "aesni-x86.s" +.text +.globl aesni_encrypt +.type aesni_encrypt,@function +.align 16 +aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aesni_encrypt,.-.L_aesni_encrypt_begin +.globl aesni_decrypt +.type aesni_decrypt,@function +.align 16 +aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.size aesni_decrypt,.-.L_aesni_decrypt_begin +.type _aesni_encrypt2,@function +.align 16 +_aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.size _aesni_encrypt2,.-_aesni_encrypt2 +.type _aesni_decrypt2,@function +.align 16 +_aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.size _aesni_decrypt2,.-_aesni_decrypt2 +.type _aesni_encrypt3,@function +.align 16 +_aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.size _aesni_encrypt3,.-_aesni_encrypt3 +.type _aesni_decrypt3,@function +.align 16 +_aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.size _aesni_decrypt3,.-_aesni_decrypt3 +.type _aesni_encrypt4,@function +.align 16 +_aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.size _aesni_encrypt4,.-_aesni_encrypt4 +.type _aesni_decrypt4,@function +.align 16 +_aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.size _aesni_decrypt4,.-_aesni_decrypt4 +.type _aesni_encrypt6,@function +.align 16 +_aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L008_aesni_encrypt6_inner +.align 16 +.L009enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.size _aesni_encrypt6,.-_aesni_encrypt6 +.type _aesni_decrypt6,@function +.align 16 +_aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L010_aesni_decrypt6_inner +.align 16 +.L011dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L011dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.size _aesni_decrypt6,.-_aesni_decrypt6 +.globl aesni_ecb_encrypt +.type aesni_ecb_encrypt,@function +.align 16 +aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L012ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L013ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L014ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L015ecb_enc_loop6_enter +.align 16 +.L016ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L015ecb_enc_loop6_enter: + call _aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L016ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L014ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L017ecb_enc_one + movups 16(%esi),%xmm3 + je .L018ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L019ecb_enc_three + movups 48(%esi),%xmm5 + je .L020ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L017ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L021enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L021enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L018ecb_enc_two: + call _aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L019ecb_enc_three: + call _aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L020ecb_enc_four: + call _aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L012ecb_ret +.align 16 +.L013ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L022ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L023ecb_dec_loop6_enter +.align 16 +.L024ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L023ecb_dec_loop6_enter: + call _aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L024ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L022ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L025ecb_dec_one + movups 16(%esi),%xmm3 + je .L026ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L027ecb_dec_three + movups 48(%esi),%xmm5 + je .L028ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L025ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L029dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L029dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L026ecb_dec_two: + call _aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L027ecb_dec_three: + call _aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L028ecb_dec_four: + call _aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin +.globl aesni_ccm64_encrypt_blocks +.type aesni_ccm64_encrypt_blocks,@function +.align 16 +aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +.L030ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +.L031ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L031ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz .L030ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin +.globl aesni_ccm64_decrypt_blocks +.type aesni_ccm64_decrypt_blocks,@function +.align 16 +aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L032enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L033ccm64_dec_outer +.align 16 +.L033ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L034ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +.L035ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L035ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp .L033ccm64_dec_outer +.align 16 +.L034ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L036enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L036enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin +.globl aesni_ctr32_encrypt_blocks +.type aesni_ctr32_encrypt_blocks,@function +.align 16 +aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L037ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb .L038ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp .L039ctr32_loop6 +.align 16 +.L039ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc .L039ctr32_loop6 + addl $6,%eax + jz .L040ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L038ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L041ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je .L042ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L043ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je .L044ctr32_four + por %xmm7,%xmm6 + call _aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L040ctr32_ret +.align 16 +.L037ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L041ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L045enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L045enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L040ctr32_ret +.align 16 +.L042ctr32_two: + call _aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L040ctr32_ret +.align 16 +.L043ctr32_three: + call _aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L040ctr32_ret +.align 16 +.L044ctr32_four: + call _aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin +.globl aesni_xts_encrypt +.type aesni_xts_encrypt,@function +.align 16 +aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L046enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L046enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L047xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L048xts_enc_loop6 +.align 16 +.L048xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L048xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L047xts_enc_short: + addl $96,%eax + jz .L049xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L050xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L051xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L052xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L053xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L054xts_enc_done +.align 16 +.L050xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L051xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L052xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L053xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L049xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L056xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L057xts_enc_steal +.align 16 +.L054xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L056xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L057xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L057xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L058enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L058enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin +.globl aesni_xts_decrypt +.type aesni_xts_decrypt,@function +.align 16 +aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L059enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L059enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L060xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L061xts_dec_loop6 +.align 16 +.L061xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L061xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L060xts_dec_short: + addl $96,%eax + jz .L062xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L063xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L064xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L065xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L066xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call _aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L067xts_dec_done +.align 16 +.L063xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L068dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L068dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L064xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call _aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L065xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call _aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L066xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call _aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L062xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L069xts_dec_ret + movl %eax,112(%esp) + jmp .L070xts_dec_only_one_more +.align 16 +.L067xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L069xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L070xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L071dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L071dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L072xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L072xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L073dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L073dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin +.globl aesni_cbc_encrypt +.type aesni_cbc_encrypt,@function +.align 16 +aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L074cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L075cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L076cbc_enc_tail + subl $16,%eax + jmp .L077cbc_enc_loop +.align 16 +.L077cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L078enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L078enc1_loop_15 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L077cbc_enc_loop + addl $16,%eax + jnz .L076cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp .L079cbc_ret +.L076cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L077cbc_enc_loop +.align 16 +.L075cbc_decrypt: + cmpl $80,%eax + jbe .L080cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L081cbc_dec_loop6_enter +.align 16 +.L082cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L081cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L082cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L083cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L080cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L084cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L085cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L086cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L087cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call _aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L084cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L089dec1_loop_16: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L089dec1_loop_16 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L085cbc_dec_two: + call _aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L086cbc_dec_three: + call _aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L087cbc_dec_four: + call _aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp .L088cbc_dec_tail_collected +.align 16 +.L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L088cbc_dec_tail_collected: + andl $15,%eax + jnz .L090cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp .L079cbc_ret +.align 16 +.L090cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +.L079cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +.L074cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin +.type _aesni_set_encrypt_key,@function +.align 16 +_aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz .L091bad_pointer + testl %edx,%edx + jz .L091bad_pointer + call .L092pic +.L092pic: + popl %ebx + leal .Lkey_const-.L092pic(%ebx),%ebx + leal OPENSSL_ia32cap_P,%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je .L09314rounds + cmpl $192,%ecx + je .L09412rounds + cmpl $128,%ecx + jne .L095bad_keybits +.align 16 +.L09610rounds: + cmpl $268435456,%ebp + je .L09710rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L098key_128_cold +.byte 102,15,58,223,200,2 + call .L099key_128 +.byte 102,15,58,223,200,4 + call .L099key_128 +.byte 102,15,58,223,200,8 + call .L099key_128 +.byte 102,15,58,223,200,16 + call .L099key_128 +.byte 102,15,58,223,200,32 + call .L099key_128 +.byte 102,15,58,223,200,64 + call .L099key_128 +.byte 102,15,58,223,200,128 + call .L099key_128 +.byte 102,15,58,223,200,27 + call .L099key_128 +.byte 102,15,58,223,200,54 + call .L099key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp .L100good_key +.align 16 +.L099key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L098key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L100good_key +.align 16 +.L09412rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10212rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L103key_192a_cold +.byte 102,15,58,223,202,2 + call .L104key_192b +.byte 102,15,58,223,202,4 + call .L105key_192a +.byte 102,15,58,223,202,8 + call .L104key_192b +.byte 102,15,58,223,202,16 + call .L105key_192a +.byte 102,15,58,223,202,32 + call .L104key_192b +.byte 102,15,58,223,202,64 + call .L105key_192a +.byte 102,15,58,223,202,128 + call .L104key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp .L100good_key +.align 16 +.L105key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L103key_192a_cold: + movaps %xmm2,%xmm5 +.L106key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L104key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L106key_192b_warm +.align 16 +.L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L100good_key +.align 16 +.L09314rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L10814rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L109key_256a_cold +.byte 102,15,58,223,200,1 + call .L110key_256b +.byte 102,15,58,223,202,2 + call .L111key_256a +.byte 102,15,58,223,200,2 + call .L110key_256b +.byte 102,15,58,223,202,4 + call .L111key_256a +.byte 102,15,58,223,200,4 + call .L110key_256b +.byte 102,15,58,223,202,8 + call .L111key_256a +.byte 102,15,58,223,200,8 + call .L110key_256b +.byte 102,15,58,223,202,16 + call .L111key_256a +.byte 102,15,58,223,200,16 + call .L110key_256b +.byte 102,15,58,223,202,32 + call .L111key_256a +.byte 102,15,58,223,200,32 + call .L110key_256b +.byte 102,15,58,223,202,64 + call .L111key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp .L100good_key +.align 16 +.L111key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L109key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L110key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 16 +.L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L112loop_key256 +.L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L091bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L095bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key +.globl aesni_set_encrypt_key +.type aesni_set_encrypt_key,@function +.align 16 +aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + ret +.size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin +.globl aesni_set_decrypt_key +.type aesni_set_decrypt_key,@function +.align 16 +aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call _aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L114dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L115dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L115dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +.L114dec_key_ret: + ret +.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 |