diff options
Diffstat (limited to 'secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S')
-rw-r--r-- | secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S | 189 |
1 files changed, 94 insertions, 95 deletions
diff --git a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S index fcaa9c1..7cd1db0 100644 --- a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S +++ b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S @@ -29,6 +29,7 @@ ecp_nistz256_mul_by_2: pushq %r13 movq 0(%rsi),%r8 + xorq %r13,%r13 movq 8(%rsi),%r9 addq %r8,%r8 movq 16(%rsi),%r10 @@ -39,7 +40,7 @@ ecp_nistz256_mul_by_2: adcq %r10,%r10 adcq %r11,%r11 movq %r9,%rdx - sbbq %r13,%r13 + adcq $0,%r13 subq 0(%rsi),%r8 movq %r10,%rcx @@ -47,14 +48,14 @@ ecp_nistz256_mul_by_2: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -151,12 +152,12 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 - cmovzq %rcx,%r10 - cmovzq %r12,%r11 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 + cmovcq %rcx,%r10 + cmovcq %r12,%r11 xorq %r13,%r13 addq 0(%rsi),%r8 @@ -173,14 +174,14 @@ ecp_nistz256_mul_by_3: sbbq $0,%r10 movq %r11,%r12 sbbq .Lpoly+24(%rip),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -219,14 +220,14 @@ ecp_nistz256_add: sbbq 16(%rsi),%r10 movq %r11,%r12 sbbq 24(%rsi),%r11 - testq %r13,%r13 + sbbq $0,%r13 - cmovzq %rax,%r8 - cmovzq %rdx,%r9 + cmovcq %rax,%r8 + cmovcq %rdx,%r9 movq %r8,0(%rdi) - cmovzq %rcx,%r10 + cmovcq %rcx,%r10 movq %r9,8(%rdi) - cmovzq %r12,%r11 + cmovcq %r12,%r11 movq %r10,16(%rdi) movq %r11,24(%rdi) @@ -1463,13 +1464,14 @@ ecp_nistz256_avx2_select_w7: .type __ecp_nistz256_add_toq,@function .align 32 __ecp_nistz256_add_toq: + xorq %r11,%r11 addq 0(%rbx),%r12 adcq 8(%rbx),%r13 movq %r12,%rax adcq 16(%rbx),%r8 adcq 24(%rbx),%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1477,14 +1479,14 @@ __ecp_nistz256_add_toq: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1552,13 +1554,14 @@ __ecp_nistz256_subq: .type __ecp_nistz256_mul_by_2q,@function .align 32 __ecp_nistz256_mul_by_2q: + xorq %r11,%r11 addq %r12,%r12 adcq %r13,%r13 movq %r12,%rax adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -1566,14 +1569,14 @@ __ecp_nistz256_mul_by_2q: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 - cmovzq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -1811,16 +1814,14 @@ ecp_nistz256_point_add: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -1832,14 +1833,14 @@ ecp_nistz256_point_add: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-0(%rsi),%rsi movq %rax,544+0(%rsp) @@ -1850,8 +1851,8 @@ ecp_nistz256_point_add: call __ecp_nistz256_sqr_montq pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -2034,6 +2035,7 @@ ecp_nistz256_point_add: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -2041,7 +2043,7 @@ ecp_nistz256_point_add: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2049,15 +2051,15 @@ ecp_nistz256_point_add: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2215,16 +2217,14 @@ ecp_nistz256_point_add_affine: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -2342,6 +2342,7 @@ ecp_nistz256_point_add_affine: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -2349,7 +2350,7 @@ ecp_nistz256_point_add_affine: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -2357,15 +2358,15 @@ ecp_nistz256_point_add_affine: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subq @@ -2512,14 +2513,14 @@ __ecp_nistz256_add_tox: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2607,14 +2608,14 @@ __ecp_nistz256_mul_by_2x: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 + sbbq $0,%r11 - btq $0,%r11 - cmovncq %rax,%r12 - cmovncq %rbp,%r13 + cmovcq %rax,%r12 + cmovcq %rbp,%r13 movq %r12,0(%rdi) - cmovncq %rcx,%r8 + cmovcq %rcx,%r8 movq %r13,8(%rdi) - cmovncq %r10,%r9 + cmovcq %r10,%r9 movq %r8,16(%rdi) movq %r9,24(%rdi) @@ -2844,16 +2845,14 @@ ecp_nistz256_point_addx: movq %rdx,%rsi movdqa %xmm0,384(%rsp) movdqa %xmm1,384+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,416(%rsp) movdqa %xmm3,416+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,448(%rsp) movdqa %xmm5,448+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rsi),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rsi),%xmm1 movdqu 32(%rsi),%xmm2 por %xmm3,%xmm5 @@ -2865,14 +2864,14 @@ ecp_nistz256_point_addx: movdqa %xmm0,480(%rsp) pshufd $0x1e,%xmm5,%xmm4 movdqa %xmm1,480+16(%rsp) - por %xmm0,%xmm1 -.byte 102,72,15,110,199 + movdqu 64(%rsi),%xmm0 + movdqu 80(%rsi),%xmm1 movdqa %xmm2,512(%rsp) movdqa %xmm3,512+16(%rsp) - por %xmm2,%xmm3 por %xmm4,%xmm5 pxor %xmm4,%xmm4 - por %xmm1,%xmm3 + por %xmm0,%xmm1 +.byte 102,72,15,110,199 leaq 64-128(%rsi),%rsi movq %rdx,544+0(%rsp) @@ -2883,8 +2882,8 @@ ecp_nistz256_point_addx: call __ecp_nistz256_sqr_montx pcmpeqd %xmm4,%xmm5 - pshufd $0xb1,%xmm3,%xmm4 - por %xmm3,%xmm4 + pshufd $0xb1,%xmm1,%xmm4 + por %xmm1,%xmm4 pshufd $0,%xmm5,%xmm5 pshufd $0x1e,%xmm4,%xmm3 por %xmm3,%xmm4 @@ -3067,6 +3066,7 @@ ecp_nistz256_point_addx: + xorq %r11,%r11 addq %r12,%r12 leaq 96(%rsp),%rsi adcq %r13,%r13 @@ -3074,7 +3074,7 @@ ecp_nistz256_point_addx: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3082,15 +3082,15 @@ ecp_nistz256_point_addx: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx @@ -3244,16 +3244,14 @@ ecp_nistz256_point_add_affinex: movq 64+24(%rsi),%r8 movdqa %xmm0,320(%rsp) movdqa %xmm1,320+16(%rsp) - por %xmm0,%xmm1 movdqa %xmm2,352(%rsp) movdqa %xmm3,352+16(%rsp) - por %xmm2,%xmm3 movdqa %xmm4,384(%rsp) movdqa %xmm5,384+16(%rsp) - por %xmm1,%xmm3 + por %xmm4,%xmm5 movdqu 0(%rbx),%xmm0 - pshufd $0xb1,%xmm3,%xmm5 + pshufd $0xb1,%xmm5,%xmm3 movdqu 16(%rbx),%xmm1 movdqu 32(%rbx),%xmm2 por %xmm3,%xmm5 @@ -3371,6 +3369,7 @@ ecp_nistz256_point_add_affinex: + xorq %r11,%r11 addq %r12,%r12 leaq 192(%rsp),%rsi adcq %r13,%r13 @@ -3378,7 +3377,7 @@ ecp_nistz256_point_add_affinex: adcq %r8,%r8 adcq %r9,%r9 movq %r13,%rbp - sbbq %r11,%r11 + adcq $0,%r11 subq $-1,%r12 movq %r8,%rcx @@ -3386,15 +3385,15 @@ ecp_nistz256_point_add_affinex: sbbq $0,%r8 movq %r9,%r10 sbbq %r15,%r9 - testq %r11,%r11 + sbbq $0,%r11 - cmovzq %rax,%r12 + cmovcq %rax,%r12 movq 0(%rsi),%rax - cmovzq %rbp,%r13 + cmovcq %rbp,%r13 movq 8(%rsi),%rbp - cmovzq %rcx,%r8 + cmovcq %rcx,%r8 movq 16(%rsi),%rcx - cmovzq %r10,%r9 + cmovcq %r10,%r9 movq 24(%rsi),%r10 call __ecp_nistz256_subx |