summaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S')
-rw-r--r--secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S1514
1 files changed, 1512 insertions, 2 deletions
diff --git a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
index 5d7d5e2..67c214e 100644
--- a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
+++ b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
@@ -1,4 +1,5 @@
- # $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from ecp_nistz256-x86_64.pl.
.text
@@ -333,6 +334,8 @@ ecp_nistz256_neg:
.type ecp_nistz256_to_mont,@function
.align 32
ecp_nistz256_to_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
leaq .LRR(%rip),%rdx
jmp .Lmul_mont
.size ecp_nistz256_to_mont,.-ecp_nistz256_to_mont
@@ -347,6 +350,8 @@ ecp_nistz256_to_mont:
.type ecp_nistz256_mul_mont,@function
.align 32
ecp_nistz256_mul_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
.Lmul_mont:
pushq %rbp
pushq %rbx
@@ -354,6 +359,8 @@ ecp_nistz256_mul_mont:
pushq %r13
pushq %r14
pushq %r15
+ cmpl $0x80100,%ecx
+ je .Lmul_montx
movq %rdx,%rbx
movq 0(%rdx),%rax
movq 0(%rsi),%r9
@@ -362,6 +369,19 @@ ecp_nistz256_mul_mont:
movq 24(%rsi),%r12
call __ecp_nistz256_mul_montq
+ jmp .Lmul_mont_done
+
+.align 32
+.Lmul_montx:
+ movq %rdx,%rbx
+ movq 0(%rdx),%rdx
+ movq 0(%rsi),%r9
+ movq 8(%rsi),%r10
+ movq 16(%rsi),%r11
+ movq 24(%rsi),%r12
+ leaq -128(%rsi),%rsi
+
+ call __ecp_nistz256_mul_montx
.Lmul_mont_done:
popq %r15
popq %r14
@@ -599,18 +619,33 @@ __ecp_nistz256_mul_montq:
.type ecp_nistz256_sqr_mont,@function
.align 32
ecp_nistz256_sqr_mont:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
pushq %rbp
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
+ cmpl $0x80100,%ecx
+ je .Lsqr_montx
movq 0(%rsi),%rax
movq 8(%rsi),%r14
movq 16(%rsi),%r15
movq 24(%rsi),%r8
call __ecp_nistz256_sqr_montq
+ jmp .Lsqr_mont_done
+
+.align 32
+.Lsqr_montx:
+ movq 0(%rsi),%rdx
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r15
+ movq 24(%rsi),%r8
+ leaq -128(%rsi),%rsi
+
+ call __ecp_nistz256_sqr_montx
.Lsqr_mont_done:
popq %r15
popq %r14
@@ -782,6 +817,304 @@ __ecp_nistz256_sqr_montq:
.byte 0xf3,0xc3
.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq
+.type __ecp_nistz256_mul_montx,@function
+.align 32
+__ecp_nistz256_mul_montx:
+
+
+ mulxq %r9,%r8,%r9
+ mulxq %r10,%rcx,%r10
+ movq $32,%r14
+ xorq %r13,%r13
+ mulxq %r11,%rbp,%r11
+ movq .Lpoly+24(%rip),%r15
+ adcq %rcx,%r9
+ mulxq %r12,%rcx,%r12
+ movq %r8,%rdx
+ adcq %rbp,%r10
+ shlxq %r14,%r8,%rbp
+ adcq %rcx,%r11
+ shrxq %r14,%r8,%rcx
+ adcq $0,%r12
+
+
+
+ addq %rbp,%r9
+ adcq %rcx,%r10
+
+ mulxq %r15,%rcx,%rbp
+ movq 8(%rbx),%rdx
+ adcq %rcx,%r11
+ adcq %rbp,%r12
+ adcq $0,%r13
+ xorq %r8,%r8
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r9
+ adoxq %rbp,%r10
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r9,%rdx
+ adcxq %rcx,%r12
+ shlxq %r14,%r9,%rcx
+ adoxq %rbp,%r13
+ shrxq %r14,%r9,%rbp
+
+ adcxq %r8,%r13
+ adoxq %r8,%r8
+ adcq $0,%r8
+
+
+
+ addq %rcx,%r10
+ adcq %rbp,%r11
+
+ mulxq %r15,%rcx,%rbp
+ movq 16(%rbx),%rdx
+ adcq %rcx,%r12
+ adcq %rbp,%r13
+ adcq $0,%r8
+ xorq %r9,%r9
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r10
+ adoxq %rbp,%r11
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r10,%rdx
+ adcxq %rcx,%r13
+ shlxq %r14,%r10,%rcx
+ adoxq %rbp,%r8
+ shrxq %r14,%r10,%rbp
+
+ adcxq %r9,%r8
+ adoxq %r9,%r9
+ adcq $0,%r9
+
+
+
+ addq %rcx,%r11
+ adcq %rbp,%r12
+
+ mulxq %r15,%rcx,%rbp
+ movq 24(%rbx),%rdx
+ adcq %rcx,%r13
+ adcq %rbp,%r8
+ adcq $0,%r9
+ xorq %r10,%r10
+
+
+
+ mulxq 0+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq 8+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+
+ mulxq 16+128(%rsi),%rcx,%rbp
+ adcxq %rcx,%r13
+ adoxq %rbp,%r8
+
+ mulxq 24+128(%rsi),%rcx,%rbp
+ movq %r11,%rdx
+ adcxq %rcx,%r8
+ shlxq %r14,%r11,%rcx
+ adoxq %rbp,%r9
+ shrxq %r14,%r11,%rbp
+
+ adcxq %r10,%r9
+ adoxq %r10,%r10
+ adcq $0,%r10
+
+
+
+ addq %rcx,%r12
+ adcq %rbp,%r13
+
+ mulxq %r15,%rcx,%rbp
+ movq %r12,%rbx
+ movq .Lpoly+8(%rip),%r14
+ adcq %rcx,%r8
+ movq %r13,%rdx
+ adcq %rbp,%r9
+ adcq $0,%r10
+
+
+
+ xorl %eax,%eax
+ movq %r8,%rcx
+ sbbq $-1,%r12
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%rbp
+ sbbq %r15,%r9
+ sbbq $0,%r10
+
+ cmovcq %rbx,%r12
+ cmovcq %rdx,%r13
+ movq %r12,0(%rdi)
+ cmovcq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovcq %rbp,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx
+
+.type __ecp_nistz256_sqr_montx,@function
+.align 32
+__ecp_nistz256_sqr_montx:
+ mulxq %r14,%r9,%r10
+ mulxq %r15,%rcx,%r11
+ xorl %eax,%eax
+ adcq %rcx,%r10
+ mulxq %r8,%rbp,%r12
+ movq %r14,%rdx
+ adcq %rbp,%r11
+ adcq $0,%r12
+ xorq %r13,%r13
+
+
+ mulxq %r15,%rcx,%rbp
+ adcxq %rcx,%r11
+ adoxq %rbp,%r12
+
+ mulxq %r8,%rcx,%rbp
+ movq %r15,%rdx
+ adcxq %rcx,%r12
+ adoxq %rbp,%r13
+ adcq $0,%r13
+
+
+ mulxq %r8,%rcx,%r14
+ movq 0+128(%rsi),%rdx
+ xorq %r15,%r15
+ adcxq %r9,%r9
+ adoxq %rcx,%r13
+ adcxq %r10,%r10
+ adoxq %r15,%r14
+
+ mulxq %rdx,%r8,%rbp
+ movq 8+128(%rsi),%rdx
+ adcxq %r11,%r11
+ adoxq %rbp,%r9
+ adcxq %r12,%r12
+ mulxq %rdx,%rcx,%rax
+ movq 16+128(%rsi),%rdx
+ adcxq %r13,%r13
+ adoxq %rcx,%r10
+ adcxq %r14,%r14
+.byte 0x67
+ mulxq %rdx,%rcx,%rbp
+ movq 24+128(%rsi),%rdx
+ adoxq %rax,%r11
+ adcxq %r15,%r15
+ adoxq %rcx,%r12
+ movq $32,%rsi
+ adoxq %rbp,%r13
+.byte 0x67,0x67
+ mulxq %rdx,%rcx,%rax
+ movq %r8,%rdx
+ adoxq %rcx,%r14
+ shlxq %rsi,%r8,%rcx
+ adoxq %rax,%r15
+ shrxq %rsi,%r8,%rax
+ movq .Lpoly+24(%rip),%rbp
+
+
+ addq %rcx,%r9
+ adcq %rax,%r10
+
+ mulxq %rbp,%rcx,%r8
+ movq %r9,%rdx
+ adcq %rcx,%r11
+ shlxq %rsi,%r9,%rcx
+ adcq $0,%r8
+ shrxq %rsi,%r9,%rax
+
+
+ addq %rcx,%r10
+ adcq %rax,%r11
+
+ mulxq %rbp,%rcx,%r9
+ movq %r10,%rdx
+ adcq %rcx,%r8
+ shlxq %rsi,%r10,%rcx
+ adcq $0,%r9
+ shrxq %rsi,%r10,%rax
+
+
+ addq %rcx,%r11
+ adcq %rax,%r8
+
+ mulxq %rbp,%rcx,%r10
+ movq %r11,%rdx
+ adcq %rcx,%r9
+ shlxq %rsi,%r11,%rcx
+ adcq $0,%r10
+ shrxq %rsi,%r11,%rax
+
+
+ addq %rcx,%r8
+ adcq %rax,%r9
+
+ mulxq %rbp,%rcx,%r11
+ adcq %rcx,%r10
+ adcq $0,%r11
+
+ xorq %rdx,%rdx
+ adcq %r8,%r12
+ movq .Lpoly+8(%rip),%rsi
+ adcq %r9,%r13
+ movq %r12,%r8
+ adcq %r10,%r14
+ adcq %r11,%r15
+ movq %r13,%r9
+ adcq $0,%rdx
+
+ xorl %eax,%eax
+ sbbq $-1,%r12
+ movq %r14,%r10
+ sbbq %rsi,%r13
+ sbbq $0,%r14
+ movq %r15,%r11
+ sbbq %rbp,%r15
+ sbbq $0,%rdx
+
+ cmovcq %r8,%r12
+ cmovcq %r9,%r13
+ movq %r12,0(%rdi)
+ cmovcq %r10,%r14
+ movq %r13,8(%rdi)
+ cmovcq %r11,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx
@@ -884,6 +1217,9 @@ ecp_nistz256_from_mont:
.type ecp_nistz256_select_w5,@function
.align 32
ecp_nistz256_select_w5:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ testl $32,%eax
+ jnz .Lavx2_select_w5
movdqa .LOne(%rip),%xmm0
movd %edx,%xmm1
@@ -943,6 +1279,9 @@ ecp_nistz256_select_w5:
.type ecp_nistz256_select_w7,@function
.align 32
ecp_nistz256_select_w7:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
+ testl $32,%eax
+ jnz .Lavx2_select_w7
movdqa .LOne(%rip),%xmm8
movd %edx,%xmm1
@@ -984,11 +1323,141 @@ ecp_nistz256_select_w7:
movdqu %xmm5,48(%rdi)
.byte 0xf3,0xc3
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
+
+
+.type ecp_nistz256_avx2_select_w5,@function
+.align 32
+ecp_nistz256_avx2_select_w5:
+.Lavx2_select_w5:
+ vzeroupper
+ vmovdqa .LTwo(%rip),%ymm0
+
+ vpxor %ymm2,%ymm2,%ymm2
+ vpxor %ymm3,%ymm3,%ymm3
+ vpxor %ymm4,%ymm4,%ymm4
+
+ vmovdqa .LOne(%rip),%ymm5
+ vmovdqa .LTwo(%rip),%ymm10
+
+ vmovd %edx,%xmm1
+ vpermd %ymm1,%ymm2,%ymm1
+
+ movq $8,%rax
+.Lselect_loop_avx2_w5:
+
+ vmovdqa 0(%rsi),%ymm6
+ vmovdqa 32(%rsi),%ymm7
+ vmovdqa 64(%rsi),%ymm8
+
+ vmovdqa 96(%rsi),%ymm11
+ vmovdqa 128(%rsi),%ymm12
+ vmovdqa 160(%rsi),%ymm13
+
+ vpcmpeqd %ymm1,%ymm5,%ymm9
+ vpcmpeqd %ymm1,%ymm10,%ymm14
+
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpaddd %ymm0,%ymm10,%ymm10
+ leaq 192(%rsi),%rsi
+
+ vpand %ymm9,%ymm6,%ymm6
+ vpand %ymm9,%ymm7,%ymm7
+ vpand %ymm9,%ymm8,%ymm8
+ vpand %ymm14,%ymm11,%ymm11
+ vpand %ymm14,%ymm12,%ymm12
+ vpand %ymm14,%ymm13,%ymm13
+
+ vpxor %ymm6,%ymm2,%ymm2
+ vpxor %ymm7,%ymm3,%ymm3
+ vpxor %ymm8,%ymm4,%ymm4
+ vpxor %ymm11,%ymm2,%ymm2
+ vpxor %ymm12,%ymm3,%ymm3
+ vpxor %ymm13,%ymm4,%ymm4
+
+ decq %rax
+ jnz .Lselect_loop_avx2_w5
+
+ vmovdqu %ymm2,0(%rdi)
+ vmovdqu %ymm3,32(%rdi)
+ vmovdqu %ymm4,64(%rdi)
+ vzeroupper
+ .byte 0xf3,0xc3
+.size ecp_nistz256_avx2_select_w5,.-ecp_nistz256_avx2_select_w5
+
+
+
.globl ecp_nistz256_avx2_select_w7
.type ecp_nistz256_avx2_select_w7,@function
.align 32
ecp_nistz256_avx2_select_w7:
-.byte 0x0f,0x0b
+.Lavx2_select_w7:
+ vzeroupper
+ vmovdqa .LThree(%rip),%ymm0
+
+ vpxor %ymm2,%ymm2,%ymm2
+ vpxor %ymm3,%ymm3,%ymm3
+
+ vmovdqa .LOne(%rip),%ymm4
+ vmovdqa .LTwo(%rip),%ymm8
+ vmovdqa .LThree(%rip),%ymm12
+
+ vmovd %edx,%xmm1
+ vpermd %ymm1,%ymm2,%ymm1
+
+
+ movq $21,%rax
+.Lselect_loop_avx2_w7:
+
+ vmovdqa 0(%rsi),%ymm5
+ vmovdqa 32(%rsi),%ymm6
+
+ vmovdqa 64(%rsi),%ymm9
+ vmovdqa 96(%rsi),%ymm10
+
+ vmovdqa 128(%rsi),%ymm13
+ vmovdqa 160(%rsi),%ymm14
+
+ vpcmpeqd %ymm1,%ymm4,%ymm7
+ vpcmpeqd %ymm1,%ymm8,%ymm11
+ vpcmpeqd %ymm1,%ymm12,%ymm15
+
+ vpaddd %ymm0,%ymm4,%ymm4
+ vpaddd %ymm0,%ymm8,%ymm8
+ vpaddd %ymm0,%ymm12,%ymm12
+ leaq 192(%rsi),%rsi
+
+ vpand %ymm7,%ymm5,%ymm5
+ vpand %ymm7,%ymm6,%ymm6
+ vpand %ymm11,%ymm9,%ymm9
+ vpand %ymm11,%ymm10,%ymm10
+ vpand %ymm15,%ymm13,%ymm13
+ vpand %ymm15,%ymm14,%ymm14
+
+ vpxor %ymm5,%ymm2,%ymm2
+ vpxor %ymm6,%ymm3,%ymm3
+ vpxor %ymm9,%ymm2,%ymm2
+ vpxor %ymm10,%ymm3,%ymm3
+ vpxor %ymm13,%ymm2,%ymm2
+ vpxor %ymm14,%ymm3,%ymm3
+
+ decq %rax
+ jnz .Lselect_loop_avx2_w7
+
+
+ vmovdqa 0(%rsi),%ymm5
+ vmovdqa 32(%rsi),%ymm6
+
+ vpcmpeqd %ymm1,%ymm4,%ymm7
+
+ vpand %ymm7,%ymm5,%ymm5
+ vpand %ymm7,%ymm6,%ymm6
+
+ vpxor %ymm5,%ymm2,%ymm2
+ vpxor %ymm6,%ymm3,%ymm3
+
+ vmovdqu %ymm2,0(%rdi)
+ vmovdqu %ymm3,32(%rdi)
+ vzeroupper
.byte 0xf3,0xc3
.size ecp_nistz256_avx2_select_w7,.-ecp_nistz256_avx2_select_w7
.type __ecp_nistz256_add_toq,@function
@@ -1114,6 +1583,10 @@ __ecp_nistz256_mul_by_2q:
.type ecp_nistz256_point_double,@function
.align 32
ecp_nistz256_point_double:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_doublex
pushq %rbp
pushq %rbx
pushq %r12
@@ -1316,6 +1789,10 @@ ecp_nistz256_point_double:
.type ecp_nistz256_point_add,@function
.align 32
ecp_nistz256_point_add:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_addx
pushq %rbp
pushq %rbx
pushq %r12
@@ -1713,6 +2190,10 @@ ecp_nistz256_point_add:
.type ecp_nistz256_point_add_affine,@function
.align 32
ecp_nistz256_point_add_affine:
+ movl $0x80100,%ecx
+ andl OPENSSL_ia32cap_P+8(%rip),%ecx
+ cmpl $0x80100,%ecx
+ je .Lpoint_add_affinex
pushq %rbp
pushq %rbx
pushq %r12
@@ -2012,3 +2493,1032 @@ ecp_nistz256_point_add_affine:
popq %rbp
.byte 0xf3,0xc3
.size ecp_nistz256_point_add_affine,.-ecp_nistz256_point_add_affine
+.type __ecp_nistz256_add_tox,@function
+.align 32
+__ecp_nistz256_add_tox:
+ xorq %r11,%r11
+ adcq 0(%rbx),%r12
+ adcq 8(%rbx),%r13
+ movq %r12,%rax
+ adcq 16(%rbx),%r8
+ adcq 24(%rbx),%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ xorq %r10,%r10
+ sbbq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+
+ btq $0,%r11
+ cmovncq %rax,%r12
+ cmovncq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovncq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovncq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox
+
+.type __ecp_nistz256_sub_fromx,@function
+.align 32
+__ecp_nistz256_sub_fromx:
+ xorq %r11,%r11
+ sbbq 0(%rbx),%r12
+ sbbq 8(%rbx),%r13
+ movq %r12,%rax
+ sbbq 16(%rbx),%r8
+ sbbq 24(%rbx),%r9
+ movq %r13,%rbp
+ sbbq $0,%r11
+
+ xorq %r10,%r10
+ adcq $-1,%r12
+ movq %r8,%rcx
+ adcq %r14,%r13
+ adcq $0,%r8
+ movq %r9,%r10
+ adcq %r15,%r9
+
+ btq $0,%r11
+ cmovncq %rax,%r12
+ cmovncq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovncq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovncq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx
+
+.type __ecp_nistz256_subx,@function
+.align 32
+__ecp_nistz256_subx:
+ xorq %r11,%r11
+ sbbq %r12,%rax
+ sbbq %r13,%rbp
+ movq %rax,%r12
+ sbbq %r8,%rcx
+ sbbq %r9,%r10
+ movq %rbp,%r13
+ sbbq $0,%r11
+
+ xorq %r9,%r9
+ adcq $-1,%rax
+ movq %rcx,%r8
+ adcq %r14,%rbp
+ adcq $0,%rcx
+ movq %r10,%r9
+ adcq %r15,%r10
+
+ btq $0,%r11
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
+ cmovcq %rcx,%r8
+ cmovcq %r10,%r9
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_subx,.-__ecp_nistz256_subx
+
+.type __ecp_nistz256_mul_by_2x,@function
+.align 32
+__ecp_nistz256_mul_by_2x:
+ xorq %r11,%r11
+ adcq %r12,%r12
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ adcq $0,%r11
+
+ xorq %r10,%r10
+ sbbq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+
+ btq $0,%r11
+ cmovncq %rax,%r12
+ cmovncq %rbp,%r13
+ movq %r12,0(%rdi)
+ cmovncq %rcx,%r8
+ movq %r13,8(%rdi)
+ cmovncq %r10,%r9
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+
+ .byte 0xf3,0xc3
+.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x
+.type ecp_nistz256_point_doublex,@function
+.align 32
+ecp_nistz256_point_doublex:
+.Lpoint_doublex:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $160+8,%rsp
+
+.Lpoint_double_shortcutx:
+ movdqu 0(%rsi),%xmm0
+ movq %rsi,%rbx
+ movdqu 16(%rsi),%xmm1
+ movq 32+0(%rsi),%r12
+ movq 32+8(%rsi),%r13
+ movq 32+16(%rsi),%r8
+ movq 32+24(%rsi),%r9
+ movq .Lpoly+8(%rip),%r14
+ movq .Lpoly+24(%rip),%r15
+ movdqa %xmm0,96(%rsp)
+ movdqa %xmm1,96+16(%rsp)
+ leaq 32(%rdi),%r10
+ leaq 64(%rdi),%r11
+.byte 102,72,15,110,199
+.byte 102,73,15,110,202
+.byte 102,73,15,110,211
+
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ leaq 64-128(%rsi),%rsi
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 32(%rbx),%rdx
+ movq 64+0(%rbx),%r9
+ movq 64+8(%rbx),%r10
+ movq 64+16(%rbx),%r11
+ movq 64+24(%rbx),%r12
+ leaq 64-128(%rbx),%rsi
+ leaq 32(%rbx),%rbx
+.byte 102,72,15,126,215
+ call __ecp_nistz256_mul_montx
+ call __ecp_nistz256_mul_by_2x
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_tox
+
+ movq 96+0(%rsp),%r12
+ movq 96+8(%rsp),%r13
+ leaq 64(%rsp),%rbx
+ movq 96+16(%rsp),%r8
+ movq 96+24(%rsp),%r9
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sqr_montx
+ xorq %r9,%r9
+ movq %r12,%rax
+ addq $-1,%r12
+ movq %r13,%r10
+ adcq %rsi,%r13
+ movq %r14,%rcx
+ adcq $0,%r14
+ movq %r15,%r8
+ adcq %rbp,%r15
+ adcq $0,%r9
+ xorq %rsi,%rsi
+ testq $1,%rax
+
+ cmovzq %rax,%r12
+ cmovzq %r10,%r13
+ cmovzq %rcx,%r14
+ cmovzq %r8,%r15
+ cmovzq %rsi,%r9
+
+ movq %r13,%rax
+ shrq $1,%r12
+ shlq $63,%rax
+ movq %r14,%r10
+ shrq $1,%r13
+ orq %rax,%r12
+ shlq $63,%r10
+ movq %r15,%rcx
+ shrq $1,%r14
+ orq %r10,%r13
+ shlq $63,%rcx
+ movq %r12,0(%rdi)
+ shrq $1,%r15
+ movq %r13,8(%rdi)
+ shlq $63,%r9
+ orq %rcx,%r14
+ orq %r9,%r15
+ movq %r14,16(%rdi)
+ movq %r15,24(%rdi)
+ movq 64(%rsp),%rdx
+ leaq 64(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ leaq 32(%rsp),%rbx
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_add_tox
+
+ movq 96(%rsp),%rdx
+ leaq 96(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_by_2x
+
+ movq 0+32(%rsp),%rdx
+ movq 8+32(%rsp),%r14
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r15
+ movq 24+32(%rsp),%r8
+.byte 102,72,15,126,199
+ call __ecp_nistz256_sqr_montx
+
+ leaq 128(%rsp),%rbx
+ movq %r14,%r8
+ movq %r15,%r9
+ movq %rsi,%r14
+ movq %rbp,%r15
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_subx
+
+ movq 32(%rsp),%rdx
+ leaq 32(%rsp),%rbx
+ movq %r12,%r14
+ xorl %ecx,%ecx
+ movq %r12,0+0(%rsp)
+ movq %r13,%r10
+ movq %r13,0+8(%rsp)
+ cmovzq %r8,%r11
+ movq %r8,0+16(%rsp)
+ leaq 0-128(%rsp),%rsi
+ cmovzq %r9,%r12
+ movq %r9,0+24(%rsp)
+ movq %r14,%r9
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+.byte 102,72,15,126,203
+.byte 102,72,15,126,207
+ call __ecp_nistz256_sub_fromx
+
+ addq $160+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_doublex,.-ecp_nistz256_point_doublex
+.type ecp_nistz256_point_addx,@function
+.align 32
+ecp_nistz256_point_addx:
+.Lpoint_addx:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $576+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq %rsi,%rbx
+ movq %rdx,%rsi
+ movdqa %xmm0,384(%rsp)
+ movdqa %xmm1,384+16(%rsp)
+ por %xmm0,%xmm1
+ movdqa %xmm2,416(%rsp)
+ movdqa %xmm3,416+16(%rsp)
+ por %xmm2,%xmm3
+ movdqa %xmm4,448(%rsp)
+ movdqa %xmm5,448+16(%rsp)
+ por %xmm1,%xmm3
+
+ movdqu 0(%rsi),%xmm0
+ pshufd $0xb1,%xmm3,%xmm5
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rsi),%xmm3
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,480(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,480+16(%rsp)
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+ movdqa %xmm2,512(%rsp)
+ movdqa %xmm3,512+16(%rsp)
+ por %xmm2,%xmm3
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm1,%xmm3
+
+ leaq 64-128(%rsi),%rsi
+ movq %rdx,544+0(%rsp)
+ movq %r14,544+8(%rsp)
+ movq %r15,544+16(%rsp)
+ movq %r8,544+24(%rsp)
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm3,%xmm4
+ por %xmm3,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+ movq 64+0(%rbx),%rdx
+ movq 64+8(%rbx),%r14
+ movq 64+16(%rbx),%r15
+ movq 64+24(%rbx),%r8
+.byte 102,72,15,110,203
+
+ leaq 64-128(%rbx),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 544(%rsp),%rdx
+ leaq 544(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 416(%rsp),%rdx
+ leaq 416(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq -128+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 512(%rsp),%rdx
+ leaq 512(%rsp),%rbx
+ movq 0+256(%rsp),%r9
+ movq 8+256(%rsp),%r10
+ leaq -128+256(%rsp),%rsi
+ movq 16+256(%rsp),%r11
+ movq 24+256(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 224(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ orq %r13,%r12
+ movdqa %xmm4,%xmm2
+ orq %r8,%r12
+ orq %r9,%r12
+ por %xmm5,%xmm2
+.byte 102,73,15,110,220
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+96(%rsp),%r9
+ movq 8+96(%rsp),%r10
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r11
+ movq 24+96(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 480(%rsp),%rdx
+ leaq 480(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 160(%rsp),%rbx
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ orq %r13,%r12
+ orq %r8,%r12
+ orq %r9,%r12
+
+.byte 0x3e
+ jnz .Ladd_proceedx
+.byte 102,73,15,126,208
+.byte 102,73,15,126,217
+ testq %r8,%r8
+ jnz .Ladd_proceedx
+ testq %r9,%r9
+ jz .Ladd_doublex
+
+.byte 102,72,15,126,199
+ pxor %xmm0,%xmm0
+ movdqu %xmm0,0(%rdi)
+ movdqu %xmm0,16(%rdi)
+ movdqu %xmm0,32(%rdi)
+ movdqu %xmm0,48(%rdi)
+ movdqu %xmm0,64(%rdi)
+ movdqu %xmm0,80(%rdi)
+ jmp .Ladd_donex
+
+.align 32
+.Ladd_doublex:
+.byte 102,72,15,126,206
+.byte 102,72,15,126,199
+ addq $416,%rsp
+ jmp .Lpoint_double_shortcutx
+
+.align 32
+.Ladd_proceedx:
+ movq 0+64(%rsp),%rdx
+ movq 8+64(%rsp),%r14
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+0(%rsp),%r9
+ movq 8+0(%rsp),%r10
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r11
+ movq 24+0(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 0+0(%rsp),%rdx
+ movq 8+0(%rsp),%r14
+ leaq -128+0(%rsp),%rsi
+ movq 16+0(%rsp),%r15
+ movq 24+0(%rsp),%r8
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 544(%rsp),%rdx
+ leaq 544(%rsp),%rbx
+ movq 0+352(%rsp),%r9
+ movq 8+352(%rsp),%r10
+ leaq -128+352(%rsp),%rsi
+ movq 16+352(%rsp),%r11
+ movq 24+352(%rsp),%r12
+ leaq 352(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 0(%rsp),%rdx
+ leaq 0(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 160(%rsp),%rdx
+ leaq 160(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+
+
+
+ addq %r12,%r12
+ leaq 96(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ sbbq %r11,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ testq %r11,%r11
+
+ cmovzq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovzq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovzq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovzq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subx
+
+ leaq 128(%rsp),%rbx
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 192+0(%rsp),%rax
+ movq 192+8(%rsp),%rbp
+ movq 192+16(%rsp),%rcx
+ movq 192+24(%rsp),%r10
+ leaq 320(%rsp),%rdi
+
+ call __ecp_nistz256_subx
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 128(%rsp),%rdx
+ leaq 128(%rsp),%rbx
+ movq 0+224(%rsp),%r9
+ movq 8+224(%rsp),%r10
+ leaq -128+224(%rsp),%rsi
+ movq 16+224(%rsp),%r11
+ movq 24+224(%rsp),%r12
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 320(%rsp),%rdx
+ leaq 320(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 256(%rsp),%rbx
+ leaq 320(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 352(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 352+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 544(%rsp),%xmm2
+ pand 544+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 480(%rsp),%xmm2
+ pand 480+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 320(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 320+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 512(%rsp),%xmm2
+ pand 512+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+.Ladd_donex:
+ addq $576+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_addx,.-ecp_nistz256_point_addx
+.type ecp_nistz256_point_add_affinex,@function
+.align 32
+ecp_nistz256_point_add_affinex:
+.Lpoint_add_affinex:
+ pushq %rbp
+ pushq %rbx
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ subq $480+8,%rsp
+
+ movdqu 0(%rsi),%xmm0
+ movq %rdx,%rbx
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+ movdqu 64(%rsi),%xmm4
+ movdqu 80(%rsi),%xmm5
+ movq 64+0(%rsi),%rdx
+ movq 64+8(%rsi),%r14
+ movq 64+16(%rsi),%r15
+ movq 64+24(%rsi),%r8
+ movdqa %xmm0,320(%rsp)
+ movdqa %xmm1,320+16(%rsp)
+ por %xmm0,%xmm1
+ movdqa %xmm2,352(%rsp)
+ movdqa %xmm3,352+16(%rsp)
+ por %xmm2,%xmm3
+ movdqa %xmm4,384(%rsp)
+ movdqa %xmm5,384+16(%rsp)
+ por %xmm1,%xmm3
+
+ movdqu 0(%rbx),%xmm0
+ pshufd $0xb1,%xmm3,%xmm5
+ movdqu 16(%rbx),%xmm1
+ movdqu 32(%rbx),%xmm2
+ por %xmm3,%xmm5
+ movdqu 48(%rbx),%xmm3
+ movdqa %xmm0,416(%rsp)
+ pshufd $0x1e,%xmm5,%xmm4
+ movdqa %xmm1,416+16(%rsp)
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
+ movdqa %xmm2,448(%rsp)
+ movdqa %xmm3,448+16(%rsp)
+ por %xmm2,%xmm3
+ por %xmm4,%xmm5
+ pxor %xmm4,%xmm4
+ por %xmm1,%xmm3
+
+ leaq 64-128(%rsi),%rsi
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ pcmpeqd %xmm4,%xmm5
+ pshufd $0xb1,%xmm3,%xmm4
+ movq 0(%rbx),%rdx
+
+ movq %r12,%r9
+ por %xmm3,%xmm4
+ pshufd $0,%xmm5,%xmm5
+ pshufd $0x1e,%xmm4,%xmm3
+ movq %r13,%r10
+ por %xmm3,%xmm4
+ pxor %xmm3,%xmm3
+ movq %r14,%r11
+ pcmpeqd %xmm3,%xmm4
+ pshufd $0,%xmm4,%xmm4
+
+ leaq 32-128(%rsp),%rsi
+ movq %r15,%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 320(%rsp),%rbx
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 384(%rsp),%rdx
+ leaq 384(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 288(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 448(%rsp),%rdx
+ leaq 448(%rsp),%rbx
+ movq 0+32(%rsp),%r9
+ movq 8+32(%rsp),%r10
+ leaq -128+32(%rsp),%rsi
+ movq 16+32(%rsp),%r11
+ movq 24+32(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 352(%rsp),%rbx
+ leaq 96(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+64(%rsp),%rdx
+ movq 8+64(%rsp),%r14
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r15
+ movq 24+64(%rsp),%r8
+ leaq 128(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 0+96(%rsp),%rdx
+ movq 8+96(%rsp),%r14
+ leaq -128+96(%rsp),%rsi
+ movq 16+96(%rsp),%r15
+ movq 24+96(%rsp),%r8
+ leaq 192(%rsp),%rdi
+ call __ecp_nistz256_sqr_montx
+
+ movq 128(%rsp),%rdx
+ leaq 128(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 160(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 320(%rsp),%rdx
+ leaq 320(%rsp),%rbx
+ movq 0+128(%rsp),%r9
+ movq 8+128(%rsp),%r10
+ leaq -128+128(%rsp),%rsi
+ movq 16+128(%rsp),%r11
+ movq 24+128(%rsp),%r12
+ leaq 0(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+
+
+
+ addq %r12,%r12
+ leaq 192(%rsp),%rsi
+ adcq %r13,%r13
+ movq %r12,%rax
+ adcq %r8,%r8
+ adcq %r9,%r9
+ movq %r13,%rbp
+ sbbq %r11,%r11
+
+ subq $-1,%r12
+ movq %r8,%rcx
+ sbbq %r14,%r13
+ sbbq $0,%r8
+ movq %r9,%r10
+ sbbq %r15,%r9
+ testq %r11,%r11
+
+ cmovzq %rax,%r12
+ movq 0(%rsi),%rax
+ cmovzq %rbp,%r13
+ movq 8(%rsi),%rbp
+ cmovzq %rcx,%r8
+ movq 16(%rsi),%rcx
+ cmovzq %r10,%r9
+ movq 24(%rsi),%r10
+
+ call __ecp_nistz256_subx
+
+ leaq 160(%rsp),%rbx
+ leaq 224(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+ movq 0+0(%rsp),%rax
+ movq 0+8(%rsp),%rbp
+ movq 0+16(%rsp),%rcx
+ movq 0+24(%rsp),%r10
+ leaq 64(%rsp),%rdi
+
+ call __ecp_nistz256_subx
+
+ movq %r12,0(%rdi)
+ movq %r13,8(%rdi)
+ movq %r8,16(%rdi)
+ movq %r9,24(%rdi)
+ movq 352(%rsp),%rdx
+ leaq 352(%rsp),%rbx
+ movq 0+160(%rsp),%r9
+ movq 8+160(%rsp),%r10
+ leaq -128+160(%rsp),%rsi
+ movq 16+160(%rsp),%r11
+ movq 24+160(%rsp),%r12
+ leaq 32(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ movq 96(%rsp),%rdx
+ leaq 96(%rsp),%rbx
+ movq 0+64(%rsp),%r9
+ movq 8+64(%rsp),%r10
+ leaq -128+64(%rsp),%rsi
+ movq 16+64(%rsp),%r11
+ movq 24+64(%rsp),%r12
+ leaq 64(%rsp),%rdi
+ call __ecp_nistz256_mul_montx
+
+ leaq 32(%rsp),%rbx
+ leaq 256(%rsp),%rdi
+ call __ecp_nistz256_sub_fromx
+
+.byte 102,72,15,126,199
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 288(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 288+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand .LONE_mont(%rip),%xmm2
+ pand .LONE_mont+16(%rip),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 384(%rsp),%xmm2
+ pand 384+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,64(%rdi)
+ movdqu %xmm3,80(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 224(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 224+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 416(%rsp),%xmm2
+ pand 416+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 320(%rsp),%xmm2
+ pand 320+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,0(%rdi)
+ movdqu %xmm3,16(%rdi)
+
+ movdqa %xmm5,%xmm0
+ movdqa %xmm5,%xmm1
+ pandn 256(%rsp),%xmm0
+ movdqa %xmm5,%xmm2
+ pandn 256+16(%rsp),%xmm1
+ movdqa %xmm5,%xmm3
+ pand 448(%rsp),%xmm2
+ pand 448+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+
+ movdqa %xmm4,%xmm0
+ movdqa %xmm4,%xmm1
+ pandn %xmm2,%xmm0
+ movdqa %xmm4,%xmm2
+ pandn %xmm3,%xmm1
+ movdqa %xmm4,%xmm3
+ pand 352(%rsp),%xmm2
+ pand 352+16(%rsp),%xmm3
+ por %xmm0,%xmm2
+ por %xmm1,%xmm3
+ movdqu %xmm2,32(%rdi)
+ movdqu %xmm3,48(%rdi)
+
+ addq $480+8,%rsp
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbx
+ popq %rbp
+ .byte 0xf3,0xc3
+.size ecp_nistz256_point_add_affinex,.-ecp_nistz256_point_add_affinex
OpenPOWER on IntegriCloud