summaryrefslogtreecommitdiffstats
path: root/crypto/openssl/crypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/openssl/crypto/bn/asm')
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/ppc-mont.pl10
-rw-r--r--crypto/openssl/crypto/bn/asm/ppc.pl10
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/ppc64-mont.pl12
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/x86-mont.pl15
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/x86_64-mont.pl42
-rwxr-xr-xcrypto/openssl/crypto/bn/asm/x86_64-mont5.pl61
6 files changed, 132 insertions, 18 deletions
diff --git a/crypto/openssl/crypto/bn/asm/ppc-mont.pl b/crypto/openssl/crypto/bn/asm/ppc-mont.pl
index da69c6a..6930a3a 100755
--- a/crypto/openssl/crypto/bn/asm/ppc-mont.pl
+++ b/crypto/openssl/crypto/bn/asm/ppc-mont.pl
@@ -191,7 +191,7 @@ L1st:
addi $j,$j,$BNSZ ; j++
addi $tp,$tp,$BNSZ ; tp++
- bdnz- L1st
+ bdnz L1st
;L1st
addc $lo0,$alo,$hi0
addze $hi0,$ahi
@@ -253,7 +253,7 @@ Linner:
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
addi $tp,$tp,$BNSZ ; tp++
- bdnz- Linner
+ bdnz Linner
;Linner
$LD $tj,$BNSZ($tp) ; tp[j]
addc $lo0,$alo,$hi0
@@ -276,7 +276,7 @@ Linner:
slwi $tj,$num,`log($BNSZ)/log(2)`
$UCMP $i,$tj
addi $i,$i,$BNSZ
- ble- Louter
+ ble Louter
addi $num,$num,2 ; restore $num
subfc $j,$j,$j ; j=0 and "clear" XER[CA]
@@ -289,7 +289,7 @@ Lsub: $LDX $tj,$tp,$j
subfe $aj,$nj,$tj ; tp[j]-np[j]
$STX $aj,$rp,$j
addi $j,$j,$BNSZ
- bdnz- Lsub
+ bdnz Lsub
li $j,0
mtctr $num
@@ -304,7 +304,7 @@ Lcopy: ; copy or in-place refresh
$STX $tj,$rp,$j
$STX $j,$tp,$j ; zap at once
addi $j,$j,$BNSZ
- bdnz- Lcopy
+ bdnz Lcopy
$POP $tj,0($sp)
li r3,1
diff --git a/crypto/openssl/crypto/bn/asm/ppc.pl b/crypto/openssl/crypto/bn/asm/ppc.pl
index 04df1fe..446d8ba 100644
--- a/crypto/openssl/crypto/bn/asm/ppc.pl
+++ b/crypto/openssl/crypto/bn/asm/ppc.pl
@@ -1556,7 +1556,7 @@ Lppcasm_sub_mainloop:
# if carry = 1 this is r7-r8. Else it
# is r7-r8 -1 as we need.
$STU r6,$BNSZ(r3)
- bdnz- Lppcasm_sub_mainloop
+ bdnz Lppcasm_sub_mainloop
Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
@@ -1603,7 +1603,7 @@ Lppcasm_add_mainloop:
$LDU r8,$BNSZ(r5)
adde r8,r7,r8
$STU r8,$BNSZ(r3)
- bdnz- Lppcasm_add_mainloop
+ bdnz Lppcasm_add_mainloop
Lppcasm_add_adios:
addze r3,r0 #return carry bit.
blr
@@ -1762,7 +1762,7 @@ Lppcasm_sqr_mainloop:
$UMULH r8,r6,r6
$STU r7,$BNSZ(r3)
$STU r8,$BNSZ(r3)
- bdnz- Lppcasm_sqr_mainloop
+ bdnz Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
blr
.long 0
@@ -1827,7 +1827,7 @@ Lppcasm_mw_LOOP:
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
- bdnz- Lppcasm_mw_LOOP
+ bdnz Lppcasm_mw_LOOP
Lppcasm_mw_REM:
andi. r5,r5,0x3
@@ -1951,7 +1951,7 @@ Lppcasm_maw_mainloop:
$ST r11,`3*$BNSZ`(r3)
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
- bdnz- Lppcasm_maw_mainloop
+ bdnz Lppcasm_maw_mainloop
Lppcasm_maw_leftover:
andi. r5,r5,0x3
diff --git a/crypto/openssl/crypto/bn/asm/ppc64-mont.pl b/crypto/openssl/crypto/bn/asm/ppc64-mont.pl
index 9e3c12d..595fc6d 100755
--- a/crypto/openssl/crypto/bn/asm/ppc64-mont.pl
+++ b/crypto/openssl/crypto/bn/asm/ppc64-mont.pl
@@ -734,7 +734,7 @@ $code.=<<___;
___
}
$code.=<<___;
- bdnz- L1st
+ bdnz L1st
fctid $dota,$dota
fctid $dotb,$dotb
@@ -1280,7 +1280,7 @@ $code.=<<___;
___
}
$code.=<<___;
- bdnz- Linner
+ bdnz Linner
fctid $dota,$dota
fctid $dotb,$dotb
@@ -1490,7 +1490,7 @@ Lsub: ldx $t0,$tp,$i
stdx $t0,$rp,$i
stdx $t2,$t6,$i
addi $i,$i,16
- bdnz- Lsub
+ bdnz Lsub
li $i,0
subfe $ovf,$i,$ovf ; handle upmost overflow bit
@@ -1517,7 +1517,7 @@ Lcopy: ; copy or in-place refresh
stdx $i,$tp,$i ; zap tp at once
stdx $i,$t4,$i
addi $i,$i,16
- bdnz- Lcopy
+ bdnz Lcopy
___
$code.=<<___ if ($SIZE_T==4);
subf $np,$num,$np ; rewind np
@@ -1550,7 +1550,7 @@ Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order
stw $t5,8($rp)
stw $t6,12($rp)
stwu $t7,16($rp)
- bdnz- Lsub
+ bdnz Lsub
li $i,0
subfe $ovf,$i,$ovf ; handle upmost overflow bit
@@ -1582,7 +1582,7 @@ Lcopy: ; copy or in-place refresh
stwu $t3,16($rp)
std $i,8($tp) ; zap tp at once
stdu $i,16($tp)
- bdnz- Lcopy
+ bdnz Lcopy
___
$code.=<<___;
diff --git a/crypto/openssl/crypto/bn/asm/x86-mont.pl b/crypto/openssl/crypto/bn/asm/x86-mont.pl
index e8f6b05..89f4de6 100755
--- a/crypto/openssl/crypto/bn/asm/x86-mont.pl
+++ b/crypto/openssl/crypto/bn/asm/x86-mont.pl
@@ -85,6 +85,21 @@ $frame=32; # size of above frame rounded up to 16n
&and ("esp",-64); # align to cache line
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ &mov ("eax","ebp");
+ &sub ("eax","esp");
+ &and ("eax",-4096);
+&set_label("page_walk");
+ &mov ("edx",&DWP(0,"esp","eax"));
+ &sub ("eax",4096);
+ &data_byte(0x2e);
+ &jnc (&label("page_walk"));
+
################################# load argument block...
&mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
&mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
diff --git a/crypto/openssl/crypto/bn/asm/x86_64-mont.pl b/crypto/openssl/crypto/bn/asm/x86_64-mont.pl
index 29ba122..8fb6c99 100755
--- a/crypto/openssl/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/openssl/crypto/bn/asm/x86_64-mont.pl
@@ -130,6 +130,20 @@ $code.=<<___;
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x66,0x2e # predict non-taken
+ jnc .Lmul_page_walk
+
mov $bp,%r12 # reassign $bp
___
$bp="%r12";
@@ -342,6 +356,14 @@ $code.=<<___;
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul4x_body:
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmul4x_page_walk
+
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp
___
@@ -795,6 +817,15 @@ bn_sqr8x_mont:
sub %r11,%rsp
.Lsqr8x_sp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lsqr8x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lsqr8x_page_walk
+
mov $num,%r10
neg $num
@@ -932,8 +963,17 @@ bn_mulx4x_mont:
sub $num,%r10 # -$num
mov ($n0),$n0 # *n0
lea -72(%rsp,%r10),%rsp # alloca(frame+$num+8)
- lea ($bp,$num),%r10
and \$-128,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmulx4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x66,0x2e # predict non-taken
+ jnc .Lmulx4x_page_walk
+
+ lea ($bp,$num),%r10
##############################################################
# Stack layout
# +0 num
diff --git a/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl b/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl
index 2e8c9db..938e170 100755
--- a/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/openssl/crypto/bn/asm/x86_64-mont5.pl
@@ -115,6 +115,20 @@ $code.=<<___;
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ sub %rsp,%rax
+ and \$-4096,%rax
+.Lmul_page_walk:
+ mov (%rsp,%rax),%r11
+ sub \$4096,%rax
+ .byte 0x2e # predict non-taken
+ jnc .Lmul_page_walk
+
lea 128($bp),%r12 # reassign $bp (+size optimization)
___
$bp="%r12";
@@ -469,6 +483,15 @@ $code.=<<___;
sub %r11,%rsp
.Lmul4xsp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmul4x_page_walk
+
neg $num
mov %rax,40(%rsp)
@@ -1058,6 +1081,15 @@ $code.=<<___;
sub %r11,%rsp
.Lpwr_sp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lpwr_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lpwr_page_walk
+
mov $num,%r10
neg $num
@@ -2028,7 +2060,16 @@ bn_from_mont8x:
sub %r11,%rsp
.Lfrom_sp_done:
and \$-64,%rsp
- mov $num,%r10
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lfrom_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lfrom_page_walk
+
+ mov $num,%r10
neg $num
##############################################################
@@ -2173,6 +2214,15 @@ bn_mulx4x_mont_gather5:
sub %r11,%rsp
.Lmulx4xsp_done:
and \$-64,%rsp # ensure alignment
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmulx4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmulx4x_page_walk
+
##############################################################
# Stack layout
# +0 -num
@@ -2619,6 +2669,15 @@ bn_powerx5:
sub %r11,%rsp
.Lpwrx_sp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lpwrx_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lpwrx_page_walk
+
mov $num,%r10
neg $num
OpenPOWER on IntegriCloud