summaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm/bn-alpha.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/bn-alpha.pl')
-rw-r--r--crypto/bn/asm/bn-alpha.pl571
1 files changed, 571 insertions, 0 deletions
diff --git a/crypto/bn/asm/bn-alpha.pl b/crypto/bn/asm/bn-alpha.pl
new file mode 100644
index 0000000..302edf2
--- /dev/null
+++ b/crypto/bn/asm/bn-alpha.pl
@@ -0,0 +1,571 @@
+#!/usr/local/bin/perl
+# I have this in perl so I can use more usefull register names and then convert
+# them into alpha registers.
+#
+
+$d=&data();
+$d =~ s/CC/0/g;
+$d =~ s/R1/1/g;
+$d =~ s/R2/2/g;
+$d =~ s/R3/3/g;
+$d =~ s/R4/4/g;
+$d =~ s/L1/5/g;
+$d =~ s/L2/6/g;
+$d =~ s/L3/7/g;
+$d =~ s/L4/8/g;
+$d =~ s/O1/22/g;
+$d =~ s/O2/23/g;
+$d =~ s/O3/24/g;
+$d =~ s/O4/25/g;
+$d =~ s/A1/20/g;
+$d =~ s/A2/21/g;
+$d =~ s/A3/27/g;
+$d =~ s/A4/28/g;
+if (0){
+}
+
+print $d;
+
+sub data
+ {
+ local($data)=<<'EOF';
+
+ # DEC Alpha assember
+ # The bn_div_words is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div_words.
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_words
+ .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$CC
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ ldq $R1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $A1,$19,$L1 # 1 2 1 ######
+ ldq $A2,8($17) # 2 1
+ ldq $R2,8($16) # 2 1
+ umulh $A1,$19,$A1 # 1 2 ######
+ ldq $A3,16($17) # 3 1
+ ldq $R3,16($16) # 3 1
+ mulq $A2,$19,$L2 # 2 2 1 ######
+ ldq $A4,24($17) # 4 1
+ addq $R1,$L1,$R1 # 1 2 2
+ ldq $R4,24($16) # 4 1
+ umulh $A2,$19,$A2 # 2 2 ######
+ cmpult $R1,$L1,$O1 # 1 2 3 1
+ addq $A1,$O1,$A1 # 1 3 1
+ addq $R1,$CC,$R1 # 1 2 3 1
+ mulq $A3,$19,$L3 # 3 2 1 ######
+ cmpult $R1,$CC,$CC # 1 2 3 2
+ addq $R2,$L2,$R2 # 2 2 2
+ addq $A1,$CC,$CC # 1 3 2
+ cmpult $R2,$L2,$O2 # 2 2 3 1
+ addq $A2,$O2,$A2 # 2 3 1
+ umulh $A3,$19,$A3 # 3 2 ######
+ addq $R2,$CC,$R2 # 2 2 3 1
+ cmpult $R2,$CC,$CC # 2 2 3 2
+ subq $18,4,$18
+ mulq $A4,$19,$L4 # 4 2 1 ######
+ addq $A2,$CC,$CC # 2 3 2
+ addq $R3,$L3,$R3 # 3 2 2
+ addq $16,32,$16
+ cmpult $R3,$L3,$O3 # 3 2 3 1
+ stq $R1,-32($16) # 1 2 4
+ umulh $A4,$19,$A4 # 4 2 ######
+ addq $A3,$O3,$A3 # 3 3 1
+ addq $R3,$CC,$R3 # 3 2 3 1
+ stq $R2,-24($16) # 2 2 4
+ cmpult $R3,$CC,$CC # 3 2 3 2
+ stq $R3,-16($16) # 3 2 4
+ addq $R4,$L4,$R4 # 4 2 2
+ addq $A3,$CC,$CC # 3 3 2
+ cmpult $R4,$L4,$O4 # 4 2 3 1
+ addq $17,32,$17
+ addq $A4,$O4,$A4 # 4 3 1
+ addq $R4,$CC,$R4 # 4 2 3 1
+ cmpult $R4,$CC,$CC # 4 2 3 2
+ stq $R4,-8($16) # 4 2 4
+ addq $A4,$CC,$CC # 4 3 2
+ blt $18,$43
+
+ ldq $A1,0($17) # 1 1
+ ldq $R1,0($16) # 1 1
+
+ br $42
+
+ .align 4
+$45:
+ ldq $A1,0($17) # 4 1
+ ldq $R1,0($16) # 4 1
+ mulq $A1,$19,$L1 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $A1,$19,$A1 # 4 2
+ addq $R1,$L1,$R1 # 4 2 2
+ cmpult $R1,$L1,$O1 # 4 2 3 1
+ addq $A1,$O1,$A1 # 4 3 1
+ addq $R1,$CC,$R1 # 4 2 3 1
+ cmpult $R1,$CC,$CC # 4 2 3 2
+ addq $A1,$CC,$CC # 4 3 2
+ stq $R1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
+
+ .align 4
+$43:
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_add_words
+ .align 3
+ .globl bn_mul_words
+ .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$CC
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $A1,$19,$L1 # 1 2 1 #####
+ ldq $A2,8($17) # 2 1
+ ldq $A3,16($17) # 3 1
+ umulh $A1,$19,$A1 # 1 2 #####
+ ldq $A4,24($17) # 4 1
+ mulq $A2,$19,$L2 # 2 2 1 #####
+ addq $L1,$CC,$L1 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $L1,$CC,$CC # 1 2 3 2
+ umulh $A2,$19,$A2 # 2 2 #####
+ addq $A1,$CC,$CC # 1 3 2
+ addq $17,32,$17
+ addq $L2,$CC,$L2 # 2 2 3 1
+ mulq $A3,$19,$L3 # 3 2 1 #####
+ cmpult $L2,$CC,$CC # 2 2 3 2
+ addq $A2,$CC,$CC # 2 3 2
+ addq $16,32,$16
+ umulh $A3,$19,$A3 # 3 2 #####
+ stq $L1,-32($16) # 1 2 4
+ mulq $A4,$19,$L4 # 4 2 1 #####
+ addq $L3,$CC,$L3 # 3 2 3 1
+ stq $L2,-24($16) # 2 2 4
+ cmpult $L3,$CC,$CC # 3 2 3 2
+ umulh $A4,$19,$A4 # 4 2 #####
+ addq $A3,$CC,$CC # 3 3 2
+ stq $L3,-16($16) # 3 2 4
+ addq $L4,$CC,$L4 # 4 2 3 1
+ cmpult $L4,$CC,$CC # 4 2 3 2
+
+ addq $A4,$CC,$CC # 4 3 2
+
+ stq $L4,-8($16) # 4 2 4
+
+ blt $18,$143
+
+ ldq $A1,0($17) # 1 1
+
+ br $142
+
+ .align 4
+$145:
+ ldq $A1,0($17) # 4 1
+ mulq $A1,$19,$L1 # 4 2 1
+ subq $18,1,$18
+ umulh $A1,$19,$A1 # 4 2
+ addq $L1,$CC,$L1 # 4 2 3 1
+ addq $16,8,$16
+ cmpult $L1,$CC,$CC # 4 2 3 2
+ addq $17,8,$17
+ addq $A1,$CC,$CC # 4 3 2
+ stq $L1,-8($16) # 4 2 4
+
+ bgt $18,$145
+ ret $31,($26),1 # else exit
+
+ .align 4
+$143:
+ addq $18,4,$18
+ bgt $18,$145 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_words
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,4,$18
+ blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ .align 3
+$542:
+ mulq $A1,$A1,$L1 ######
+ ldq $A2,8($17) # 1 1
+ subq $18,4
+ umulh $A1,$A1,$R1 ######
+ ldq $A3,16($17) # 1 1
+ mulq $A2,$A2,$L2 ######
+ ldq $A4,24($17) # 1 1
+ stq $L1,0($16) # r[0]
+ umulh $A2,$A2,$R2 ######
+ stq $R1,8($16) # r[1]
+ mulq $A3,$A3,$L3 ######
+ stq $L2,16($16) # r[0]
+ umulh $A3,$A3,$R3 ######
+ stq $R2,24($16) # r[1]
+ mulq $A4,$A4,$L4 ######
+ stq $L3,32($16) # r[0]
+ umulh $A4,$A4,$R4 ######
+ stq $R3,40($16) # r[1]
+
+ addq $16,64,$16
+ addq $17,32,$17
+ stq $L4,-16($16) # r[0]
+ stq $R4,-8($16) # r[1]
+
+ blt $18,$543
+ ldq $A1,0($17) # 1 1
+ br $542
+
+$442:
+ ldq $A1,0($17) # a[0]
+ mulq $A1,$A1,$L1 # a[0]*w low part r2
+ addq $16,16,$16
+ addq $17,8,$17
+ subq $18,1,$18
+ umulh $A1,$A1,$R1 # a[0]*w high part r3
+ stq $L1,-16($16) # r[0]
+ stq $R1,-8($16) # r[1]
+
+ bgt $18,$442
+ ret $31,($26),1 # else exit
+
+ .align 4
+$543:
+ addq $18,4,$18
+ bgt $18,$442 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sqr_words
+
+ .align 3
+ .globl bn_add_words
+ .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$CC # carry = 0
+ blt $19,$900
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ .align 3
+$901:
+ addq $R1,$L1,$R1 # r=a+b;
+ ldq $L2,8($17) # a[1]
+ cmpult $R1,$L1,$O1 # did we overflow?
+ ldq $R2,8($18) # b[1]
+ addq $R1,$CC,$R1 # c+= overflow
+ ldq $L3,16($17) # a[2]
+ cmpult $R1,$CC,$CC # overflow?
+ ldq $R3,16($18) # b[2]
+ addq $CC,$O1,$CC
+ ldq $L4,24($17) # a[3]
+ addq $R2,$L2,$R2 # r=a+b;
+ ldq $R4,24($18) # b[3]
+ cmpult $R2,$L2,$O2 # did we overflow?
+ addq $R3,$L3,$R3 # r=a+b;
+ addq $R2,$CC,$R2 # c+= overflow
+ cmpult $R3,$L3,$O3 # did we overflow?
+ cmpult $R2,$CC,$CC # overflow?
+ addq $R4,$L4,$R4 # r=a+b;
+ addq $CC,$O2,$CC
+ cmpult $R4,$L4,$O4 # did we overflow?
+ addq $R3,$CC,$R3 # c+= overflow
+ stq $R1,0($16) # r[0]=c
+ cmpult $R3,$CC,$CC # overflow?
+ stq $R2,8($16) # r[1]=c
+ addq $CC,$O3,$CC
+ stq $R3,16($16) # r[2]=c
+ addq $R4,$CC,$R4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $R4,$CC,$CC # overflow?
+ addq $17,32,$17 # a++
+ addq $CC,$O4,$CC
+ stq $R4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$900
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ br $901
+ .align 4
+$945:
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ addq $R1,$L1,$R1 # r=a+b;
+ subq $19,1,$19 # loop--
+ addq $R1,$CC,$R1 # c+= overflow
+ addq $17,8,$17 # a++
+ cmpult $R1,$L1,$O1 # did we overflow?
+ cmpult $R1,$CC,$CC # overflow?
+ addq $18,8,$18 # b++
+ stq $R1,0($16) # r[0]=c
+ addq $CC,$O1,$CC
+ addq $16,8,$16 # r++
+
+ bgt $19,$945
+ ret $31,($26),1 # else exit
+
+$900:
+ addq $19,4,$19
+ bgt $19,$945 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_add_words
+
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$CC # carry = 0
+ br $800
+ blt $19,$800
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ .align 3
+$801:
+ addq $R1,$L1,$R1 # r=a+b;
+ ldq $L2,8($17) # a[1]
+ cmpult $R1,$L1,$O1 # did we overflow?
+ ldq $R2,8($18) # b[1]
+ addq $R1,$CC,$R1 # c+= overflow
+ ldq $L3,16($17) # a[2]
+ cmpult $R1,$CC,$CC # overflow?
+ ldq $R3,16($18) # b[2]
+ addq $CC,$O1,$CC
+ ldq $L4,24($17) # a[3]
+ addq $R2,$L2,$R2 # r=a+b;
+ ldq $R4,24($18) # b[3]
+ cmpult $R2,$L2,$O2 # did we overflow?
+ addq $R3,$L3,$R3 # r=a+b;
+ addq $R2,$CC,$R2 # c+= overflow
+ cmpult $R3,$L3,$O3 # did we overflow?
+ cmpult $R2,$CC,$CC # overflow?
+ addq $R4,$L4,$R4 # r=a+b;
+ addq $CC,$O2,$CC
+ cmpult $R4,$L4,$O4 # did we overflow?
+ addq $R3,$CC,$R3 # c+= overflow
+ stq $R1,0($16) # r[0]=c
+ cmpult $R3,$CC,$CC # overflow?
+ stq $R2,8($16) # r[1]=c
+ addq $CC,$O3,$CC
+ stq $R3,16($16) # r[2]=c
+ addq $R4,$CC,$R4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $R4,$CC,$CC # overflow?
+ addq $17,32,$17 # a++
+ addq $CC,$O4,$CC
+ stq $R4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$800
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ br $801
+ .align 4
+$845:
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ cmpult $L1,$R1,$O1 # will we borrow?
+ subq $L1,$R1,$R1 # r=a-b;
+ subq $19,1,$19 # loop--
+ cmpult $R1,$CC,$O2 # will we borrow?
+ subq $R1,$CC,$R1 # c+= overflow
+ addq $17,8,$17 # a++
+ addq $18,8,$18 # b++
+ stq $R1,0($16) # r[0]=c
+ addq $O2,$O1,$CC
+ addq $16,8,$16 # r++
+
+ bgt $19,$845
+ ret $31,($26),1 # else exit
+
+$800:
+ addq $19,4,$19
+ bgt $19,$845 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sub_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div_words
+ .ent bn_div_words
+bn_div_words:
+ ldgp $29,0($27)
+bn_div_words..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div_words
+EOF
+ return($data);
+ }
+
OpenPOWER on IntegriCloud