diff options
Diffstat (limited to 'crypto/sha/asm/sha512-x86_64.pl')
-rwxr-xr-x | crypto/sha/asm/sha512-x86_64.pl | 226 |
1 files changed, 166 insertions, 60 deletions
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index b6252d3..f611a2d 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -40,14 +40,18 @@ # sha256_block:-( This is presumably because 64-bit shifts/rotates # apparently are not atomic instructions, but implemented in microcode. -$output=shift; +$flavour = shift; +$output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $output"; +open STDOUT,"| $^X $xlate $flavour $output"; if ($output =~ /512/) { $func="sha512_block_data_order"; @@ -91,50 +95,44 @@ sub ROUND_00_15() { my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_; $code.=<<___; - mov $e,$a0 - mov $e,$a1 + ror \$`$Sigma1[2]-$Sigma1[1]`,$a0 mov $f,$a2 + mov $T1,`$SZ*($i&0xf)`(%rsp) - ror \$$Sigma1[0],$a0 - ror \$$Sigma1[1],$a1 + ror \$`$Sigma0[2]-$Sigma0[1]`,$a1 + xor $e,$a0 xor $g,$a2 # f^g - xor $a1,$a0 - ror \$`$Sigma1[2]-$Sigma1[1]`,$a1 + ror \$`$Sigma1[1]-$Sigma1[0]`,$a0 + add $h,$T1 # T1+=h + xor $a,$a1 + + add ($Tbl,$round,$SZ),$T1 # T1+=K[round] and $e,$a2 # (f^g)&e - mov $T1,`$SZ*($i&0xf)`(%rsp) + mov $b,$h - xor $a1,$a0 # Sigma1(e) + ror \$`$Sigma0[1]-$Sigma0[0]`,$a1 + xor $e,$a0 xor $g,$a2 # Ch(e,f,g)=((f^g)&e)^g - add $h,$T1 # T1+=h - - mov $a,$h - add $a0,$T1 # T1+=Sigma1(e) + xor $c,$h # b^c + xor $a,$a1 add $a2,$T1 # T1+=Ch(e,f,g) - mov $a,$a0 - mov $a,$a1 + mov $b,$a2 - ror \$$Sigma0[0],$h - ror \$$Sigma0[1],$a0 - mov $a,$a2 - add ($Tbl,$round,$SZ),$T1 # T1+=K[round] + ror \$$Sigma1[0],$a0 # Sigma1(e) + and $a,$h # h=(b^c)&a + and $c,$a2 # b&c - xor $a0,$h - ror \$`$Sigma0[2]-$Sigma0[1]`,$a0 - or $c,$a1 # a|c + ror \$$Sigma0[0],$a1 # Sigma0(a) + add $a0,$T1 # T1+=Sigma1(e) + add $a2,$h # h+=b&c (completes +=Maj(a,b,c) - xor $a0,$h # h=Sigma0(a) - and $c,$a2 # a&c add $T1,$d # d+=T1 - - and $b,$a1 # (a|c)&b add $T1,$h # h+=T1 - - or $a2,$a1 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1($round),$round # round++ + add $a1,$h # h+=Sigma0(a) - add $a1,$h # h+=Maj(a,b,c) ___ } @@ -143,32 +141,30 @@ sub ROUND_16_XX() $code.=<<___; mov `$SZ*(($i+1)&0xf)`(%rsp),$a0 - mov `$SZ*(($i+14)&0xf)`(%rsp),$T1 - - mov $a0,$a2 + mov `$SZ*(($i+14)&0xf)`(%rsp),$a1 + mov $a0,$T1 + mov $a1,$a2 + ror \$`$sigma0[1]-$sigma0[0]`,$T1 + xor $a0,$T1 shr \$$sigma0[2],$a0 - ror \$$sigma0[0],$a2 - - xor $a2,$a0 - ror \$`$sigma0[1]-$sigma0[0]`,$a2 - - xor $a2,$a0 # sigma0(X[(i+1)&0xf]) - mov $T1,$a1 - shr \$$sigma1[2],$T1 - ror \$$sigma1[0],$a1 + ror \$$sigma0[0],$T1 + xor $T1,$a0 # sigma0(X[(i+1)&0xf]) + mov `$SZ*(($i+9)&0xf)`(%rsp),$T1 - xor $a1,$T1 - ror \$`$sigma1[1]-$sigma1[0]`,$a1 - - xor $a1,$T1 # sigma1(X[(i+14)&0xf]) + ror \$`$sigma1[1]-$sigma1[0]`,$a2 + xor $a1,$a2 + shr \$$sigma1[2],$a1 + ror \$$sigma1[0],$a2 add $a0,$T1 - - add `$SZ*(($i+9)&0xf)`(%rsp),$T1 + xor $a2,$a1 # sigma1(X[(i+14)&0xf]) add `$SZ*($i&0xf)`(%rsp),$T1 + mov $e,$a0 + add $a1,$T1 + mov $a,$a1 ___ &ROUND_00_15(@_); } @@ -186,7 +182,7 @@ $func: push %r13 push %r14 push %r15 - mov %rsp,%rbp # copy %rsp + mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$$framesz,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -194,10 +190,10 @@ $func: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %rbp,$_rsp # save copy of %rsp + mov %r11,$_rsp # save copy of %rsp +.Lprologue: - .picmeup $Tbl - lea $TABLE-.($Tbl),$Tbl + lea $TABLE(%rip),$Tbl mov $SZ*0($ctx),$A mov $SZ*1($ctx),$B @@ -215,6 +211,8 @@ $func: ___ for($i=0;$i<16;$i++) { $code.=" mov $SZ*$i($inp),$T1\n"; + $code.=" mov @ROT[4],$a0\n"; + $code.=" mov @ROT[0],$a1\n"; $code.=" bswap $T1\n"; &ROUND_00_15($i,@ROT); unshift(@ROT,pop(@ROT)); @@ -257,14 +255,15 @@ $code.=<<___; mov $H,$SZ*7($ctx) jb .Lloop - mov $_rsp,%rsp - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp - pop %rbx - + mov $_rsp,%rsi + mov (%rsi),%r15 + mov 8(%rsi),%r14 + mov 16(%rsi),%r13 + mov 24(%rsi),%r12 + mov 32(%rsi),%rbp + mov 40(%rsi),%rbx + lea 48(%rsi),%rsp +.Lepilogue: ret .size $func,.-$func ___ @@ -339,6 +338,113 @@ $TABLE: ___ } +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern __imp_RtlVirtualUnwind +.type se_handler,\@abi-omnipotent +.align 16 +se_handler: + push %rsi + push %rdi + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + pushfq + sub \$64,%rsp + + mov 120($context),%rax # pull context->Rax + mov 248($context),%rbx # pull context->Rip + + lea .Lprologue(%rip),%r10 + cmp %r10,%rbx # context->Rip<.Lprologue + jb .Lin_prologue + + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue(%rip),%r10 + cmp %r10,%rbx # context->Rip>=.Lepilogue + jae .Lin_prologue + + mov 16*$SZ+3*8(%rax),%rax # pull $_rsp + lea 48(%rax),%rax + + mov -8(%rax),%rbx + mov -16(%rax),%rbp + mov -24(%rax),%r12 + mov -32(%rax),%r13 + mov -40(%rax),%r14 + mov -48(%rax),%r15 + mov %rbx,144($context) # restore context->Rbx + mov %rbp,160($context) # restore context->Rbp + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 + +.Lin_prologue: + mov 8(%rax),%rdi + mov 16(%rax),%rsi + mov %rax,152($context) # restore context->Rsp + mov %rsi,168($context) # restore context->Rsi + mov %rdi,176($context) # restore context->Rdi + + mov 40($disp),%rdi # disp->ContextRecord + mov $context,%rsi # context + mov \$154,%ecx # sizeof(CONTEXT) + .long 0xa548f3fc # cld; rep movsq + + mov $disp,%rsi + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER + mov 8(%rsi),%rdx # arg2, disp->ImageBase + mov 0(%rsi),%r8 # arg3, disp->ControlPc + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry + mov 40(%rsi),%r10 # disp->ContextRecord + lea 56(%rsi),%r11 # &disp->HandlerData + lea 24(%rsi),%r12 # &disp->EstablisherFrame + mov %r10,32(%rsp) # arg5 + mov %r11,40(%rsp) # arg6 + mov %r12,48(%rsp) # arg7 + mov %rcx,56(%rsp) # arg8, (NULL) + call *__imp_RtlVirtualUnwind(%rip) + + mov \$1,%eax # ExceptionContinueSearch + add \$64,%rsp + popfq + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + pop %rdi + pop %rsi + ret +.size se_handler,.-se_handler + +.section .pdata +.align 4 + .rva .LSEH_begin_$func + .rva .LSEH_end_$func + .rva .LSEH_info_$func + +.section .xdata +.align 8 +.LSEH_info_$func: + .byte 9,0,0,0 + .rva se_handler +___ +} + $code =~ s/\`([^\`]*)\`/eval $1/gem; print $code; close STDOUT; |