summaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S')
-rw-r--r--secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S189
1 files changed, 94 insertions, 95 deletions
diff --git a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
index 67c214e..194d556 100644
--- a/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
+++ b/secure/lib/libcrypto/amd64/ecp_nistz256-x86_64.S
@@ -29,6 +29,7 @@ ecp_nistz256_mul_by_2:
pushq %r13
movq 0(%rsi),%r8
+ xorq %r13,%r13
movq 8(%rsi),%r9
addq %r8,%r8
movq 16(%rsi),%r10
@@ -39,7 +40,7 @@ ecp_nistz256_mul_by_2:
adcq %r10,%r10
adcq %r11,%r11
movq %r9,%rdx
- sbbq %r13,%r13
+ adcq $0,%r13
subq 0(%rsi),%r8
movq %r10,%rcx
@@ -47,14 +48,14 @@ ecp_nistz256_mul_by_2:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -151,12 +152,12 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
- cmovzq %rcx,%r10
- cmovzq %r12,%r11
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
+ cmovcq %rcx,%r10
+ cmovcq %r12,%r11
xorq %r13,%r13
addq 0(%rsi),%r8
@@ -173,14 +174,14 @@ ecp_nistz256_mul_by_3:
sbbq $0,%r10
movq %r11,%r12
sbbq .Lpoly+24(%rip),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -219,14 +220,14 @@ ecp_nistz256_add:
sbbq 16(%rsi),%r10
movq %r11,%r12
sbbq 24(%rsi),%r11
- testq %r13,%r13
+ sbbq $0,%r13
- cmovzq %rax,%r8
- cmovzq %rdx,%r9
+ cmovcq %rax,%r8
+ cmovcq %rdx,%r9
movq %r8,0(%rdi)
- cmovzq %rcx,%r10
+ cmovcq %rcx,%r10
movq %r9,8(%rdi)
- cmovzq %r12,%r11
+ cmovcq %r12,%r11
movq %r10,16(%rdi)
movq %r11,24(%rdi)
@@ -1463,13 +1464,14 @@ ecp_nistz256_avx2_select_w7:
.type __ecp_nistz256_add_toq,@function
.align 32
__ecp_nistz256_add_toq:
+ xorq %r11,%r11
addq 0(%rbx),%r12
adcq 8(%rbx),%r13
movq %r12,%rax
adcq 16(%rbx),%r8
adcq 24(%rbx),%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1477,14 +1479,14 @@ __ecp_nistz256_add_toq:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1552,13 +1554,14 @@ __ecp_nistz256_subq:
.type __ecp_nistz256_mul_by_2q,@function
.align 32
__ecp_nistz256_mul_by_2q:
+ xorq %r11,%r11
addq %r12,%r12
adcq %r13,%r13
movq %r12,%rax
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -1566,14 +1569,14 @@ __ecp_nistz256_mul_by_2q:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
- cmovzq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -1811,16 +1814,14 @@ ecp_nistz256_point_add:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -1832,14 +1833,14 @@ ecp_nistz256_point_add:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-0(%rsi),%rsi
movq %rax,544+0(%rsp)
@@ -1850,8 +1851,8 @@ ecp_nistz256_point_add:
call __ecp_nistz256_sqr_montq
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -2034,6 +2035,7 @@ ecp_nistz256_point_add:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -2041,7 +2043,7 @@ ecp_nistz256_point_add:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2049,15 +2051,15 @@ ecp_nistz256_point_add:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2215,16 +2217,14 @@ ecp_nistz256_point_add_affine:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -2342,6 +2342,7 @@ ecp_nistz256_point_add_affine:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -2349,7 +2350,7 @@ ecp_nistz256_point_add_affine:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -2357,15 +2358,15 @@ ecp_nistz256_point_add_affine:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subq
@@ -2512,14 +2513,14 @@ __ecp_nistz256_add_tox:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2607,14 +2608,14 @@ __ecp_nistz256_mul_by_2x:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
+ sbbq $0,%r11
- btq $0,%r11
- cmovncq %rax,%r12
- cmovncq %rbp,%r13
+ cmovcq %rax,%r12
+ cmovcq %rbp,%r13
movq %r12,0(%rdi)
- cmovncq %rcx,%r8
+ cmovcq %rcx,%r8
movq %r13,8(%rdi)
- cmovncq %r10,%r9
+ cmovcq %r10,%r9
movq %r8,16(%rdi)
movq %r9,24(%rdi)
@@ -2844,16 +2845,14 @@ ecp_nistz256_point_addx:
movq %rdx,%rsi
movdqa %xmm0,384(%rsp)
movdqa %xmm1,384+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,416(%rsp)
movdqa %xmm3,416+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,448(%rsp)
movdqa %xmm5,448+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rsi),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
por %xmm3,%xmm5
@@ -2865,14 +2864,14 @@ ecp_nistz256_point_addx:
movdqa %xmm0,480(%rsp)
pshufd $0x1e,%xmm5,%xmm4
movdqa %xmm1,480+16(%rsp)
- por %xmm0,%xmm1
-.byte 102,72,15,110,199
+ movdqu 64(%rsi),%xmm0
+ movdqu 80(%rsi),%xmm1
movdqa %xmm2,512(%rsp)
movdqa %xmm3,512+16(%rsp)
- por %xmm2,%xmm3
por %xmm4,%xmm5
pxor %xmm4,%xmm4
- por %xmm1,%xmm3
+ por %xmm0,%xmm1
+.byte 102,72,15,110,199
leaq 64-128(%rsi),%rsi
movq %rdx,544+0(%rsp)
@@ -2883,8 +2882,8 @@ ecp_nistz256_point_addx:
call __ecp_nistz256_sqr_montx
pcmpeqd %xmm4,%xmm5
- pshufd $0xb1,%xmm3,%xmm4
- por %xmm3,%xmm4
+ pshufd $0xb1,%xmm1,%xmm4
+ por %xmm1,%xmm4
pshufd $0,%xmm5,%xmm5
pshufd $0x1e,%xmm4,%xmm3
por %xmm3,%xmm4
@@ -3067,6 +3066,7 @@ ecp_nistz256_point_addx:
+ xorq %r11,%r11
addq %r12,%r12
leaq 96(%rsp),%rsi
adcq %r13,%r13
@@ -3074,7 +3074,7 @@ ecp_nistz256_point_addx:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3082,15 +3082,15 @@ ecp_nistz256_point_addx:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
@@ -3244,16 +3244,14 @@ ecp_nistz256_point_add_affinex:
movq 64+24(%rsi),%r8
movdqa %xmm0,320(%rsp)
movdqa %xmm1,320+16(%rsp)
- por %xmm0,%xmm1
movdqa %xmm2,352(%rsp)
movdqa %xmm3,352+16(%rsp)
- por %xmm2,%xmm3
movdqa %xmm4,384(%rsp)
movdqa %xmm5,384+16(%rsp)
- por %xmm1,%xmm3
+ por %xmm4,%xmm5
movdqu 0(%rbx),%xmm0
- pshufd $0xb1,%xmm3,%xmm5
+ pshufd $0xb1,%xmm5,%xmm3
movdqu 16(%rbx),%xmm1
movdqu 32(%rbx),%xmm2
por %xmm3,%xmm5
@@ -3371,6 +3369,7 @@ ecp_nistz256_point_add_affinex:
+ xorq %r11,%r11
addq %r12,%r12
leaq 192(%rsp),%rsi
adcq %r13,%r13
@@ -3378,7 +3377,7 @@ ecp_nistz256_point_add_affinex:
adcq %r8,%r8
adcq %r9,%r9
movq %r13,%rbp
- sbbq %r11,%r11
+ adcq $0,%r11
subq $-1,%r12
movq %r8,%rcx
@@ -3386,15 +3385,15 @@ ecp_nistz256_point_add_affinex:
sbbq $0,%r8
movq %r9,%r10
sbbq %r15,%r9
- testq %r11,%r11
+ sbbq $0,%r11
- cmovzq %rax,%r12
+ cmovcq %rax,%r12
movq 0(%rsi),%rax
- cmovzq %rbp,%r13
+ cmovcq %rbp,%r13
movq 8(%rsi),%rbp
- cmovzq %rcx,%r8
+ cmovcq %rcx,%r8
movq 16(%rsi),%rcx
- cmovzq %r10,%r9
+ cmovcq %r10,%r9
movq 24(%rsi),%r10
call __ecp_nistz256_subx
OpenPOWER on IntegriCloud