diff options
Diffstat (limited to 'secure/lib/libcrypto/i386/x86-mont.S')
-rw-r--r-- | secure/lib/libcrypto/i386/x86-mont.S | 224 |
1 files changed, 119 insertions, 105 deletions
diff --git a/secure/lib/libcrypto/i386/x86-mont.S b/secure/lib/libcrypto/i386/x86-mont.S index 67431fb..2d59719 100644 --- a/secure/lib/libcrypto/i386/x86-mont.S +++ b/secure/lib/libcrypto/i386/x86-mont.S @@ -18,47 +18,54 @@ bn_mul_mont: jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk + jmp .L002page_walk_done +.align 16 .L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc .L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk +.L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) - call .L002PIC_me_up -.L002PIC_me_up: + movl %edx,24(%esp) + call .L003PIC_me_up +.L003PIC_me_up: popl %eax - leal OPENSSL_ia32cap_P-.L002PIC_me_up(%eax),%eax + leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax btl $26,(%eax) - jnc .L003non_sse2 + jnc .L004non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -82,7 +89,7 @@ bn_mul_mont: psrlq $32,%mm3 incl %ecx .align 16 -.L0041st: +.L0051st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -97,7 +104,7 @@ bn_mul_mont: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl .L0041st + jl .L0051st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -111,7 +118,7 @@ bn_mul_mont: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -.L005outer: +.L006outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -133,7 +140,7 @@ bn_mul_mont: paddq %mm6,%mm2 incl %ecx decl %ebx -.L006inner: +.L007inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -150,7 +157,7 @@ bn_mul_mont: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz .L006inner + jnz .L007inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -168,11 +175,11 @@ bn_mul_mont: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle .L005outer + jle .L006outer emms - jmp .L007common_tail + jmp .L008common_tail .align 16 -.L003non_sse2: +.L004non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -183,12 +190,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L008bn_sqr_mont + jz .L009bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L009mull: +.L010mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -197,7 +204,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L009mull + jl .L010mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -215,9 +222,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0102ndmadd + jmp .L0112ndmadd .align 16 -.L0111stmadd: +.L0121stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -228,7 +235,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0111stmadd + jl .L0121stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -251,7 +258,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0102ndmadd: +.L0112ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -262,7 +269,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0102ndmadd + jl .L0112ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -278,16 +285,16 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je .L007common_tail + je .L008common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0111stmadd + jmp .L0121stmadd .align 16 -.L008bn_sqr_mont: +.L009bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -298,7 +305,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L012sqr: +.L013sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -310,7 +317,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L012sqr + jl .L013sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -334,7 +341,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0133rdmadd: +.L0143rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -353,7 +360,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0133rdmadd + jl .L0143rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -369,7 +376,7 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je .L007common_tail + je .L008common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -381,12 +388,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L014sqrlast + je .L015sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L015sqradd: +.L016sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -402,13 +409,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L015sqradd + jle .L016sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L014sqrlast: +.L015sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -423,9 +430,9 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0133rdmadd + jmp .L0143rdmadd .align 16 -.L007common_tail: +.L008common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -433,13 +440,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L016sub: +.L017sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L016sub + jge .L017sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -447,12 +454,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L017copy: +.L018copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L017copy + jge .L018copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: @@ -486,44 +493,51 @@ bn_mul_mont: jl .L000just_leave leal 20(%esp),%esi leal 24(%esp),%edx - movl %esp,%ebp addl $2,%edi negl %edi - leal -32(%esp,%edi,4),%esp + leal -32(%esp,%edi,4),%ebp negl %edi - movl %esp,%eax + movl %ebp,%eax subl %edx,%eax andl $2047,%eax - subl %eax,%esp - xorl %esp,%edx + subl %eax,%ebp + xorl %ebp,%edx andl $2048,%edx xorl $2048,%edx - subl %edx,%esp - andl $-64,%esp - movl %ebp,%eax - subl %esp,%eax + subl %edx,%ebp + andl $-64,%ebp + movl %esp,%eax + subl %ebp,%eax andl $-4096,%eax + movl %esp,%edx + leal (%ebp,%eax,1),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk + jmp .L002page_walk_done +.align 16 .L001page_walk: - movl (%esp,%eax,1),%edx - subl $4096,%eax -.byte 46 - jnc .L001page_walk + leal -4096(%esp),%esp + movl (%esp),%eax + cmpl %ebp,%esp + ja .L001page_walk +.L002page_walk_done: movl (%esi),%eax movl 4(%esi),%ebx movl 8(%esi),%ecx - movl 12(%esi),%edx + movl 12(%esi),%ebp movl 16(%esi),%esi movl (%esi),%esi movl %eax,4(%esp) movl %ebx,8(%esp) movl %ecx,12(%esp) - movl %edx,16(%esp) + movl %ebp,16(%esp) movl %esi,20(%esp) leal -3(%edi),%ebx - movl %ebp,24(%esp) + movl %edx,24(%esp) leal OPENSSL_ia32cap_P,%eax btl $26,(%eax) - jnc .L002non_sse2 + jnc .L003non_sse2 movl $-1,%eax movd %eax,%mm7 movl 8(%esp),%esi @@ -547,7 +561,7 @@ bn_mul_mont: psrlq $32,%mm3 incl %ecx .align 16 -.L0031st: +.L0041st: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -562,7 +576,7 @@ bn_mul_mont: psrlq $32,%mm3 leal 1(%ecx),%ecx cmpl %ebx,%ecx - jl .L0031st + jl .L0041st pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -576,7 +590,7 @@ bn_mul_mont: paddq %mm2,%mm3 movq %mm3,32(%esp,%ebx,4) incl %edx -.L004outer: +.L005outer: xorl %ecx,%ecx movd (%edi,%edx,4),%mm4 movd (%esi),%mm5 @@ -598,7 +612,7 @@ bn_mul_mont: paddq %mm6,%mm2 incl %ecx decl %ebx -.L005inner: +.L006inner: pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 paddq %mm0,%mm2 @@ -615,7 +629,7 @@ bn_mul_mont: paddq %mm6,%mm2 decl %ebx leal 1(%ecx),%ecx - jnz .L005inner + jnz .L006inner movl %ecx,%ebx pmuludq %mm4,%mm0 pmuludq %mm5,%mm1 @@ -633,11 +647,11 @@ bn_mul_mont: movq %mm3,32(%esp,%ebx,4) leal 1(%edx),%edx cmpl %ebx,%edx - jle .L004outer + jle .L005outer emms - jmp .L006common_tail + jmp .L007common_tail .align 16 -.L002non_sse2: +.L003non_sse2: movl 8(%esp),%esi leal 1(%ebx),%ebp movl 12(%esp),%edi @@ -648,12 +662,12 @@ bn_mul_mont: leal 4(%edi,%ebx,4),%eax orl %edx,%ebp movl (%edi),%edi - jz .L007bn_sqr_mont + jz .L008bn_sqr_mont movl %eax,28(%esp) movl (%esi),%eax xorl %edx,%edx .align 16 -.L008mull: +.L009mull: movl %edx,%ebp mull %edi addl %eax,%ebp @@ -662,7 +676,7 @@ bn_mul_mont: movl (%esi,%ecx,4),%eax cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L008mull + jl .L009mull movl %edx,%ebp mull %edi movl 20(%esp),%edi @@ -680,9 +694,9 @@ bn_mul_mont: movl 4(%esi),%eax adcl $0,%edx incl %ecx - jmp .L0092ndmadd + jmp .L0102ndmadd .align 16 -.L0101stmadd: +.L0111stmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -693,7 +707,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,28(%esp,%ecx,4) - jl .L0101stmadd + jl .L0111stmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%eax @@ -716,7 +730,7 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx .align 16 -.L0092ndmadd: +.L0102ndmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -727,7 +741,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0092ndmadd + jl .L0102ndmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -743,16 +757,16 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl 28(%esp),%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl (%ecx),%edi movl 8(%esp),%esi movl %ecx,12(%esp) xorl %ecx,%ecx xorl %edx,%edx movl (%esi),%eax - jmp .L0101stmadd + jmp .L0111stmadd .align 16 -.L007bn_sqr_mont: +.L008bn_sqr_mont: movl %ebx,(%esp) movl %ecx,12(%esp) movl %edi,%eax @@ -763,7 +777,7 @@ bn_mul_mont: andl $1,%ebx incl %ecx .align 16 -.L011sqr: +.L012sqr: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -775,7 +789,7 @@ bn_mul_mont: cmpl (%esp),%ecx movl %eax,%ebx movl %ebp,28(%esp,%ecx,4) - jl .L011sqr + jl .L012sqr movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -799,7 +813,7 @@ bn_mul_mont: movl 4(%esi),%eax movl $1,%ecx .align 16 -.L0123rdmadd: +.L0133rdmadd: movl %edx,%ebp mull %edi addl 32(%esp,%ecx,4),%ebp @@ -818,7 +832,7 @@ bn_mul_mont: adcl $0,%edx cmpl %ebx,%ecx movl %ebp,24(%esp,%ecx,4) - jl .L0123rdmadd + jl .L0133rdmadd movl %edx,%ebp mull %edi addl 32(%esp,%ebx,4),%ebp @@ -834,7 +848,7 @@ bn_mul_mont: movl %edx,32(%esp,%ebx,4) cmpl %ebx,%ecx movl %eax,36(%esp,%ebx,4) - je .L006common_tail + je .L007common_tail movl 4(%esi,%ecx,4),%edi leal 1(%ecx),%ecx movl %edi,%eax @@ -846,12 +860,12 @@ bn_mul_mont: xorl %ebp,%ebp cmpl %ebx,%ecx leal 1(%ecx),%ecx - je .L013sqrlast + je .L014sqrlast movl %edx,%ebx shrl $1,%edx andl $1,%ebx .align 16 -.L014sqradd: +.L015sqradd: movl (%esi,%ecx,4),%eax movl %edx,%ebp mull %edi @@ -867,13 +881,13 @@ bn_mul_mont: cmpl (%esp),%ecx movl %ebp,28(%esp,%ecx,4) movl %eax,%ebx - jle .L014sqradd + jle .L015sqradd movl %edx,%ebp addl %edx,%edx shrl $31,%ebp addl %ebx,%edx adcl $0,%ebp -.L013sqrlast: +.L014sqrlast: movl 20(%esp),%edi movl 16(%esp),%esi imull 32(%esp),%edi @@ -888,9 +902,9 @@ bn_mul_mont: adcl $0,%edx movl $1,%ecx movl 4(%esi),%eax - jmp .L0123rdmadd + jmp .L0133rdmadd .align 16 -.L006common_tail: +.L007common_tail: movl 16(%esp),%ebp movl 4(%esp),%edi leal 32(%esp),%esi @@ -898,13 +912,13 @@ bn_mul_mont: movl %ebx,%ecx xorl %edx,%edx .align 16 -.L015sub: +.L016sub: sbbl (%ebp,%edx,4),%eax movl %eax,(%edi,%edx,4) decl %ecx movl 4(%esi,%edx,4),%eax leal 1(%edx),%edx - jge .L015sub + jge .L016sub sbbl $0,%eax andl %eax,%esi notl %eax @@ -912,12 +926,12 @@ bn_mul_mont: andl %eax,%ebp orl %ebp,%esi .align 16 -.L016copy: +.L017copy: movl (%esi,%ebx,4),%eax movl %eax,(%edi,%ebx,4) movl %ecx,32(%esp,%ebx,4) decl %ebx - jge .L016copy + jge .L017copy movl 24(%esp),%esp movl $1,%eax .L000just_leave: |