diff options
Diffstat (limited to 'crypto/openssl/crypto/bn/asm/x86-mont.pl')
-rwxr-xr-x | crypto/openssl/crypto/bn/asm/x86-mont.pl | 41 |
1 files changed, 24 insertions, 17 deletions
diff --git a/crypto/openssl/crypto/bn/asm/x86-mont.pl b/crypto/openssl/crypto/bn/asm/x86-mont.pl index 89f4de6..1c4003e 100755 --- a/crypto/openssl/crypto/bn/asm/x86-mont.pl +++ b/crypto/openssl/crypto/bn/asm/x86-mont.pl @@ -63,27 +63,26 @@ $frame=32; # size of above frame rounded up to 16n &lea ("esi",&wparam(0)); # put aside pointer to argument block &lea ("edx",&wparam(1)); # load ap - &mov ("ebp","esp"); # saved stack pointer! &add ("edi",2); # extra two words on top of tp &neg ("edi"); - &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2)) + &lea ("ebp",&DWP(-$frame,"esp","edi",4)); # future alloca($frame+4*(num+2)) &neg ("edi"); # minimize cache contention by arraning 2K window between stack # pointer and ap argument [np is also position sensitive vector, # but it's assumed to be near ap, as it's allocated at ~same # time]. - &mov ("eax","esp"); + &mov ("eax","ebp"); &sub ("eax","edx"); &and ("eax",2047); - &sub ("esp","eax"); # this aligns sp and ap modulo 2048 + &sub ("ebp","eax"); # this aligns sp and ap modulo 2048 - &xor ("edx","esp"); + &xor ("edx","ebp"); &and ("edx",2048); &xor ("edx",2048); - &sub ("esp","edx"); # this splits them apart modulo 4096 + &sub ("ebp","edx"); # this splits them apart modulo 4096 - &and ("esp",-64); # align to cache line + &and ("ebp",-64); # align to cache line # Some OSes, *cough*-dows, insist on stack being "wired" to # physical memory in strictly sequential manner, i.e. if stack @@ -91,20 +90,28 @@ $frame=32; # size of above frame rounded up to 16n # be punishable by SEGV. But page walking can do good even on # other OSes, because it guarantees that villain thread hits # the guard page before it can make damage to innocent one... - &mov ("eax","ebp"); - &sub ("eax","esp"); + &mov ("eax","esp"); + &sub ("eax","ebp"); &and ("eax",-4096); -&set_label("page_walk"); - &mov ("edx",&DWP(0,"esp","eax")); - &sub ("eax",4096); - &data_byte(0x2e); - &jnc (&label("page_walk")); + &mov ("edx","esp"); # saved stack pointer! + &lea ("esp",&DWP(0,"ebp","eax")); + &mov ("eax",&DWP(0,"esp")); + &cmp ("esp","ebp"); + &ja (&label("page_walk")); + &jmp (&label("page_walk_done")); + +&set_label("page_walk",16); + &lea ("esp",&DWP(-4096,"esp")); + &mov ("eax",&DWP(0,"esp")); + &cmp ("esp","ebp"); + &ja (&label("page_walk")); +&set_label("page_walk_done"); ################################# load argument block... &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp - &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np + &mov ("ebp",&DWP(3*4,"esi"));# const BN_ULONG *np &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 #&mov ("edi",&DWP(5*4,"esi"));# int num @@ -112,11 +119,11 @@ $frame=32; # size of above frame rounded up to 16n &mov ($_rp,"eax"); # ... save a copy of argument block &mov ($_ap,"ebx"); &mov ($_bp,"ecx"); - &mov ($_np,"edx"); + &mov ($_np,"ebp"); &mov ($_n0,"esi"); &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling #&mov ($_num,$num); # redundant as $num is not reused - &mov ($_sp,"ebp"); # saved stack pointer! + &mov ($_sp,"edx"); # saved stack pointer! if($sse2) { $acc0="mm0"; # mmx register bank layout |