summaryrefslogtreecommitdiffstats
path: root/secure/lib/libcrypto/amd64/rsaz-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/amd64/rsaz-x86_64.S')
-rw-r--r--secure/lib/libcrypto/amd64/rsaz-x86_64.S657
1 files changed, 656 insertions, 1 deletions
diff --git a/secure/lib/libcrypto/amd64/rsaz-x86_64.S b/secure/lib/libcrypto/amd64/rsaz-x86_64.S
index e2b0313..f09f499 100644
--- a/secure/lib/libcrypto/amd64/rsaz-x86_64.S
+++ b/secure/lib/libcrypto/amd64/rsaz-x86_64.S
@@ -1,4 +1,5 @@
- # $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from rsaz-x86_64.pl.
.text
@@ -20,6 +21,10 @@ rsaz_512_sqr:
movq (%rsi),%rdx
movq 8(%rsi),%rax
movq %rcx,128(%rsp)
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Loop_sqrx
jmp .Loop_sqr
.align 32
@@ -383,6 +388,276 @@ rsaz_512_sqr:
decl %r8d
jnz .Loop_sqr
+ jmp .Lsqr_tail
+
+.align 32
+.Loop_sqrx:
+ movl %r8d,128+8(%rsp)
+.byte 102,72,15,110,199
+.byte 102,72,15,110,205
+
+ mulxq %rax,%r8,%r9
+
+ mulxq 16(%rsi),%rcx,%r10
+ xorq %rbp,%rbp
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rcx,%r9
+
+ mulxq 32(%rsi),%rcx,%r12
+ adcxq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rcx,%r11
+
+.byte 0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r12
+ adcxq %rcx,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
+ adcxq %rax,%r14
+ adcxq %rbp,%r15
+
+ movq %r9,%rcx
+ shldq $1,%r8,%r9
+ shlq $1,%r8
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rdx,%r8
+ movq 8(%rsi),%rdx
+ adcxq %rbp,%r9
+
+ movq %rax,(%rsp)
+ movq %r8,8(%rsp)
+
+
+ mulxq 16(%rsi),%rax,%rbx
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+
+.byte 0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00
+ adoxq %rdi,%r11
+ adcxq %r8,%r12
+
+ mulxq 32(%rsi),%rax,%rbx
+ adoxq %rax,%r12
+ adcxq %rbx,%r13
+
+ mulxq 40(%rsi),%rdi,%r8
+ adoxq %rdi,%r13
+ adcxq %r8,%r14
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r14
+ adcxq %rbx,%r15
+
+.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r15
+ adcxq %rbp,%r8
+ adoxq %rbp,%r8
+
+ movq %r11,%rbx
+ shldq $1,%r10,%r11
+ shldq $1,%rcx,%r10
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rcx
+ movq 16(%rsi),%rdx
+ adcxq %rax,%r9
+ adcxq %rcx,%r10
+ adcxq %rbp,%r11
+
+ movq %r9,16(%rsp)
+.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
+
+
+.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00
+ adoxq %rdi,%r12
+ adcxq %r9,%r13
+
+ mulxq 32(%rsi),%rax,%rcx
+ adoxq %rax,%r13
+ adcxq %rcx,%r14
+
+ mulxq 40(%rsi),%rdi,%r9
+ adoxq %rdi,%r14
+ adcxq %r9,%r15
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r15
+ adcxq %rcx,%r8
+
+.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r8
+ adcxq %rbp,%r9
+ adoxq %rbp,%r9
+
+ movq %r13,%rcx
+ shldq $1,%r12,%r13
+ shldq $1,%rbx,%r12
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r11
+ adcxq %rdx,%r12
+ movq 24(%rsi),%rdx
+ adcxq %rbp,%r13
+
+ movq %r11,32(%rsp)
+.byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00
+
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00
+ adoxq %rax,%r14
+ adcxq %rbx,%r15
+
+ mulxq 40(%rsi),%rdi,%r10
+ adoxq %rdi,%r15
+ adcxq %r10,%r8
+
+ mulxq 48(%rsi),%rax,%rbx
+ adoxq %rax,%r8
+ adcxq %rbx,%r9
+
+ mulxq 56(%rsi),%rdi,%r10
+ adoxq %rdi,%r9
+ adcxq %rbp,%r10
+ adoxq %rbp,%r10
+
+.byte 0x66
+ movq %r15,%rbx
+ shldq $1,%r14,%r15
+ shldq $1,%rcx,%r14
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r13
+ adcxq %rdx,%r14
+ movq 32(%rsi),%rdx
+ adcxq %rbp,%r15
+
+ movq %r13,48(%rsp)
+ movq %r14,56(%rsp)
+
+
+.byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00
+ adoxq %rdi,%r8
+ adcxq %r11,%r9
+
+ mulxq 48(%rsi),%rax,%rcx
+ adoxq %rax,%r9
+ adcxq %rcx,%r10
+
+ mulxq 56(%rsi),%rdi,%r11
+ adoxq %rdi,%r10
+ adcxq %rbp,%r11
+ adoxq %rbp,%r11
+
+ movq %r9,%rcx
+ shldq $1,%r8,%r9
+ shldq $1,%rbx,%r8
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r15
+ adcxq %rdx,%r8
+ movq 40(%rsi),%rdx
+ adcxq %rbp,%r9
+
+ movq %r15,64(%rsp)
+ movq %r8,72(%rsp)
+
+
+.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
+ adoxq %rax,%r10
+ adcxq %rbx,%r11
+
+.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
+ adoxq %rdi,%r11
+ adcxq %rbp,%r12
+ adoxq %rbp,%r12
+
+ movq %r11,%rbx
+ shldq $1,%r10,%r11
+ shldq $1,%rcx,%r10
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r9
+ adcxq %rdx,%r10
+ movq 48(%rsi),%rdx
+ adcxq %rbp,%r11
+
+ movq %r9,80(%rsp)
+ movq %r10,88(%rsp)
+
+
+.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
+ adoxq %rax,%r12
+ adoxq %rbp,%r13
+
+ xorq %r14,%r14
+ shldq $1,%r13,%r14
+ shldq $1,%r12,%r13
+ shldq $1,%rbx,%r12
+
+ xorl %ebp,%ebp
+ mulxq %rdx,%rax,%rdx
+ adcxq %rax,%r11
+ adcxq %rdx,%r12
+ movq 56(%rsi),%rdx
+ adcxq %rbp,%r13
+
+.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
+.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
+
+
+ mulxq %rdx,%rax,%rdx
+ adoxq %rax,%r13
+ adoxq %rbp,%rdx
+
+.byte 0x66
+ addq %rdx,%r14
+
+ movq %r13,112(%rsp)
+ movq %r14,120(%rsp)
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+ addq 64(%rsp),%r8
+ adcq 72(%rsp),%r9
+ adcq 80(%rsp),%r10
+ adcq 88(%rsp),%r11
+ adcq 96(%rsp),%r12
+ adcq 104(%rsp),%r13
+ adcq 112(%rsp),%r14
+ adcq 120(%rsp),%r15
+ sbbq %rcx,%rcx
+
+ call __rsaz_512_subtract
+
+ movq %r8,%rdx
+ movq %r9,%rax
+ movl 128+8(%rsp),%r8d
+ movq %rdi,%rsi
+
+ decl %r8d
+ jnz .Loop_sqrx
+
+.Lsqr_tail:
leaq 128+24+48(%rsp),%rax
movq -48(%rax),%r15
@@ -411,6 +686,10 @@ rsaz_512_mul:
.byte 102,72,15,110,199
.byte 102,72,15,110,201
movq %r8,128(%rsp)
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx
movq (%rdx),%rbx
movq %rdx,%rbp
call __rsaz_512_mul
@@ -428,6 +707,29 @@ rsaz_512_mul:
movq 56(%rsp),%r15
call __rsaz_512_reduce
+ jmp .Lmul_tail
+
+.align 32
+.Lmulx:
+ movq %rdx,%rbp
+ movq (%rdx),%rdx
+ call __rsaz_512_mulx
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+.Lmul_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -518,6 +820,10 @@ rsaz_512_mul_gather4:
por %xmm9,%xmm8
pshufd $0x4e,%xmm8,%xmm9
por %xmm9,%xmm8
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx_gather
.byte 102,76,15,126,195
movq %r8,128(%rsp)
@@ -698,6 +1004,142 @@ rsaz_512_mul_gather4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
+ jmp .Lmul_gather_tail
+
+.align 32
+.Lmulx_gather:
+.byte 102,76,15,126,194
+
+ movq %r8,128(%rsp)
+ movq %rdi,128+8(%rsp)
+ movq %rcx,128+16(%rsp)
+
+ mulxq (%rsi),%rbx,%r8
+ movq %rbx,(%rsp)
+ xorl %edi,%edi
+
+ mulxq 8(%rsi),%rax,%r9
+
+ mulxq 16(%rsi),%rbx,%r10
+ adcxq %rax,%r8
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rbx,%r9
+
+ mulxq 32(%rsi),%rbx,%r12
+ adcxq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rbx,%r11
+
+ mulxq 48(%rsi),%rbx,%r14
+ adcxq %rax,%r12
+
+ mulxq 56(%rsi),%rax,%r15
+ adcxq %rbx,%r13
+ adcxq %rax,%r14
+.byte 0x67
+ movq %r8,%rbx
+ adcxq %rdi,%r15
+
+ movq $-7,%rcx
+ jmp .Loop_mulx_gather
+
+.align 32
+.Loop_mulx_gather:
+ movdqa 0(%rbp),%xmm8
+ movdqa 16(%rbp),%xmm9
+ movdqa 32(%rbp),%xmm10
+ movdqa 48(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 64(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 80(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 96(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 112(%rbp),%xmm15
+ leaq 128(%rbp),%rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd $0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+.byte 102,76,15,126,194
+
+.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+ mulxq 8(%rsi),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rsi),%rax,%r10
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+ mulxq 32(%rsi),%rax,%r12
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+.byte 0x67
+ adoxq %r15,%r14
+
+ mulxq 56(%rsi),%rax,%r15
+ movq %rbx,64(%rsp,%rcx,8)
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ movq %r8,%rbx
+ adcxq %rdi,%r15
+
+ incq %rcx
+ jnz .Loop_mulx_gather
+
+ movq %r8,64(%rsp)
+ movq %r9,64+8(%rsp)
+ movq %r10,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+ movq %r12,64+32(%rsp)
+ movq %r13,64+40(%rsp)
+ movq %r14,64+48(%rsp)
+ movq %r15,64+56(%rsp)
+
+ movq 128(%rsp),%rdx
+ movq 128+8(%rsp),%rdi
+ movq 128+16(%rsp),%rbp
+
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+.Lmul_gather_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -742,6 +1184,10 @@ rsaz_512_mul_scatter4:
movq %rcx,128(%rsp)
movq %rdi,%rbp
+ movl $0x80100,%r11d
+ andl OPENSSL_ia32cap_P+8(%rip),%r11d
+ cmpl $0x80100,%r11d
+ je .Lmulx_scatter
movq (%rdi),%rbx
call __rsaz_512_mul
@@ -758,6 +1204,29 @@ rsaz_512_mul_scatter4:
movq 56(%rsp),%r15
call __rsaz_512_reduce
+ jmp .Lmul_scatter_tail
+
+.align 32
+.Lmulx_scatter:
+ movq (%rdi),%rdx
+ call __rsaz_512_mulx
+
+.byte 102,72,15,126,199
+.byte 102,72,15,126,205
+
+ movq 128(%rsp),%rdx
+ movq (%rsp),%r8
+ movq 8(%rsp),%r9
+ movq 16(%rsp),%r10
+ movq 24(%rsp),%r11
+ movq 32(%rsp),%r12
+ movq 40(%rsp),%r13
+ movq 48(%rsp),%r14
+ movq 56(%rsp),%r15
+
+ call __rsaz_512_reducex
+
+.Lmul_scatter_tail:
addq 64(%rsp),%r8
adcq 72(%rsp),%r9
adcq 80(%rsp),%r10
@@ -804,6 +1273,7 @@ rsaz_512_mul_by_one:
subq $128+24,%rsp
.Lmul_by_one_body:
+ movl OPENSSL_ia32cap_P+8(%rip),%eax
movq %rdx,%rbp
movq %rcx,128(%rsp)
@@ -824,7 +1294,16 @@ rsaz_512_mul_by_one:
movdqa %xmm0,64(%rsp)
movdqa %xmm0,80(%rsp)
movdqa %xmm0,96(%rsp)
+ andl $0x80100,%eax
+ cmpl $0x80100,%eax
+ je .Lby_one_callx
call __rsaz_512_reduce
+ jmp .Lby_one_tail
+.align 32
+.Lby_one_callx:
+ movq 128(%rsp),%rdx
+ call __rsaz_512_reducex
+.Lby_one_tail:
movq %r8,(%rdi)
movq %r9,8(%rdi)
movq %r10,16(%rdi)
@@ -928,6 +1407,62 @@ __rsaz_512_reduce:
.byte 0xf3,0xc3
.size __rsaz_512_reduce,.-__rsaz_512_reduce
+.type __rsaz_512_reducex,@function
+.align 32
+__rsaz_512_reducex:
+
+ imulq %r8,%rdx
+ xorq %rsi,%rsi
+ movl $8,%ecx
+ jmp .Lreduction_loopx
+
+.align 32
+.Lreduction_loopx:
+ movq %r8,%rbx
+ mulxq 0(%rbp),%rax,%r8
+ adcxq %rbx,%rax
+ adoxq %r9,%r8
+
+ mulxq 8(%rbp),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rbp),%rbx,%r10
+ adcxq %rbx,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rbp),%rbx,%r11
+ adcxq %rbx,%r10
+ adoxq %r12,%r11
+
+.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
+ movq %rdx,%rax
+ movq %r8,%rdx
+ adcxq %rbx,%r11
+ adoxq %r13,%r12
+
+ mulxq 128+8(%rsp),%rbx,%rdx
+ movq %rax,%rdx
+
+ mulxq 40(%rbp),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+ mulxq 56(%rbp),%rax,%r15
+ movq %rbx,%rdx
+ adcxq %rax,%r14
+ adoxq %rsi,%r15
+ adcxq %rsi,%r15
+
+ decl %ecx
+ jne .Lreduction_loopx
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_reducex,.-__rsaz_512_reducex
.type __rsaz_512_subtract,@function
.align 32
__rsaz_512_subtract:
@@ -1127,6 +1662,126 @@ __rsaz_512_mul:
.byte 0xf3,0xc3
.size __rsaz_512_mul,.-__rsaz_512_mul
+.type __rsaz_512_mulx,@function
+.align 32
+__rsaz_512_mulx:
+ mulxq (%rsi),%rbx,%r8
+ movq $-6,%rcx
+
+ mulxq 8(%rsi),%rax,%r9
+ movq %rbx,8(%rsp)
+
+ mulxq 16(%rsi),%rbx,%r10
+ adcq %rax,%r8
+
+ mulxq 24(%rsi),%rax,%r11
+ adcq %rbx,%r9
+
+ mulxq 32(%rsi),%rbx,%r12
+ adcq %rax,%r10
+
+ mulxq 40(%rsi),%rax,%r13
+ adcq %rbx,%r11
+
+ mulxq 48(%rsi),%rbx,%r14
+ adcq %rax,%r12
+
+ mulxq 56(%rsi),%rax,%r15
+ movq 8(%rbp),%rdx
+ adcq %rbx,%r13
+ adcq %rax,%r14
+ adcq $0,%r15
+
+ xorq %rdi,%rdi
+ jmp .Loop_mulx
+
+.align 32
+.Loop_mulx:
+ movq %r8,%rbx
+ mulxq (%rsi),%rax,%r8
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+ mulxq 8(%rsi),%rax,%r9
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+ mulxq 16(%rsi),%rax,%r10
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+ mulxq 48(%rsi),%rax,%r14
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+ mulxq 56(%rsi),%rax,%r15
+ movq 64(%rbp,%rcx,8),%rdx
+ movq %rbx,8+64-8(%rsp,%rcx,8)
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ adcxq %rdi,%r15
+
+ incq %rcx
+ jnz .Loop_mulx
+
+ movq %r8,%rbx
+ mulxq (%rsi),%rax,%r8
+ adcxq %rax,%rbx
+ adoxq %r9,%r8
+
+.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
+ adcxq %rax,%r8
+ adoxq %r10,%r9
+
+.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
+ adcxq %rax,%r9
+ adoxq %r11,%r10
+
+ mulxq 24(%rsi),%rax,%r11
+ adcxq %rax,%r10
+ adoxq %r12,%r11
+
+ mulxq 32(%rsi),%rax,%r12
+ adcxq %rax,%r11
+ adoxq %r13,%r12
+
+ mulxq 40(%rsi),%rax,%r13
+ adcxq %rax,%r12
+ adoxq %r14,%r13
+
+.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
+ adcxq %rax,%r13
+ adoxq %r15,%r14
+
+.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
+ adcxq %rax,%r14
+ adoxq %rdi,%r15
+ adcxq %rdi,%r15
+
+ movq %rbx,8+64-8(%rsp)
+ movq %r8,8+64(%rsp)
+ movq %r9,8+64+8(%rsp)
+ movq %r10,8+64+16(%rsp)
+ movq %r11,8+64+24(%rsp)
+ movq %r12,8+64+32(%rsp)
+ movq %r13,8+64+40(%rsp)
+ movq %r14,8+64+48(%rsp)
+ movq %r15,8+64+56(%rsp)
+
+ .byte 0xf3,0xc3
+.size __rsaz_512_mulx,.-__rsaz_512_mulx
.globl rsaz_512_scatter4
.type rsaz_512_scatter4,@function
.align 16
OpenPOWER on IntegriCloud