diff options
Diffstat (limited to 'crypto/bn')
30 files changed, 0 insertions, 9237 deletions
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s deleted file mode 100644 index 555ff0b..0000000 --- a/crypto/bn/asm/alpha.s +++ /dev/null @@ -1,3199 +0,0 @@ - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words - .text - .align 3 - .globl bn_mul_comba4 - .ent bn_mul_comba4 -bn_mul_comba4: -bn_mul_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 0($18) - ldq $2, 8($17) - ldq $3, 8($18) - ldq $4, 16($17) - ldq $5, 16($18) - ldq $6, 24($17) - ldq $7, 24($18) - bis $31, $31, $23 - mulq $0, $1, $8 - umulh $0, $1, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $0, $3, $24 - umulh $0, $3, $25 - addq $22, $24, $22 - cmpult $22, $24, $27 - addq $27, $25, $25 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $8, $28, $8 - mulq $2, $1, $21 - umulh $2, $1, $20 - addq $22, $21, $22 - cmpult $22, $21, $19 - addq $19, $20, $20 - addq $23, $20, $23 - cmpult $23, $20, $17 - addq $8, $17, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $2, $3, $18 - umulh $2, $3, $24 - addq $23, $18, $23 - cmpult $23, $18, $27 - addq $27, $24, $24 - addq $8, $24, $8 - cmpult $8, $24, $25 - addq $22, $25, $22 - mulq $0, $5, $28 - umulh $0, $5, $21 - addq $23, $28, $23 - cmpult $23, $28, $19 - addq $19, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - mulq $4, $1, $17 - umulh $4, $1, $18 - addq $23, $17, $23 - cmpult $23, $17, $27 - addq $27, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $24 - addq $22, $24, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $0, $7, $25 - umulh $0, $7, $28 - addq $8, $25, $8 - cmpult $8, $25, $19 - addq $19, $28, $28 - addq $22, $28, $22 - cmpult $22, $28, $21 - addq $23, $21, $23 - mulq $2, $5, $20 - umulh $2, $5, $17 - addq $8, $20, $8 - cmpult $8, $20, $27 - addq $27, $17, $17 - addq $22, $17, $22 - cmpult $22, $17, $18 - addq $23, $18, $23 - mulq $4, $3, $24 - umulh $4, $3, $25 - addq $8, $24, $8 - cmpult $8, $24, $19 - addq $19, $25, $25 - addq $22, $25, $22 - cmpult $22, $25, $28 - addq $23, $28, $23 - mulq $6, $1, $21 - umulh $6, $1, $0 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $20, $0, $0 - addq $22, $0, $22 - cmpult $22, $0, $27 - addq $23, $27, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $7, $17 - umulh $2, $7, $18 - addq $22, $17, $22 - cmpult $22, $17, $24 - addq $24, $18, $18 - addq $23, $18, $23 - cmpult $23, $18, $19 - addq $8, $19, $8 - mulq $4, $5, $25 - umulh $4, $5, $28 - addq $22, $25, $22 - cmpult $22, $25, $21 - addq $21, $28, $28 - addq $23, $28, $23 - cmpult $23, $28, $20 - addq $8, $20, $8 - mulq $6, $3, $0 - umulh $6, $3, $27 - addq $22, $0, $22 - cmpult $22, $0, $1 - addq $1, $27, $27 - addq $23, $27, $23 - cmpult $23, $27, $17 - addq $8, $17, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $4, $7, $24 - umulh $4, $7, $18 - addq $23, $24, $23 - cmpult $23, $24, $19 - addq $19, $18, $18 - addq $8, $18, $8 - cmpult $8, $18, $2 - addq $22, $2, $22 - mulq $6, $5, $25 - umulh $6, $5, $21 - addq $23, $25, $23 - cmpult $23, $25, $28 - addq $28, $21, $21 - addq $8, $21, $8 - cmpult $8, $21, $20 - addq $22, $20, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $6, $7, $0 - umulh $6, $7, $1 - addq $8, $0, $8 - cmpult $8, $0, $27 - addq $27, $1, $1 - addq $22, $1, $22 - cmpult $22, $1, $17 - addq $23, $17, $23 - stq $8, 48($16) - stq $22, 56($16) - ret $31,($26),1 - .end bn_mul_comba4 - .text - .align 3 - .globl bn_mul_comba8 - .ent bn_mul_comba8 -bn_mul_comba8: -bn_mul_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - ldq $1, 0($17) - ldq $2, 0($18) - zapnot $1, 15, $7 - srl $2, 32, $8 - mulq $8, $7, $22 - srl $1, 32, $6 - zapnot $2, 15, $5 - mulq $5, $6, $4 - mulq $7, $5, $24 - addq $22, $4, $22 - cmpult $22, $4, $1 - mulq $6, $8, $3 - beq $1, $173 - bis $31, 1, $1 - sll $1, 32, $1 - addq $3, $1, $3 -$173: - sll $22, 32, $4 - addq $24, $4, $24 - stq $24, 0($16) - ldq $2, 0($17) - ldq $1, 8($18) - zapnot $2, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $2, 32, $6 - mulq $5, $6, $23 - mulq $6, $8, $6 - srl $22, 32, $1 - cmpult $24, $4, $2 - addq $3, $1, $3 - addq $2, $3, $22 - addq $25, $23, $25 - cmpult $25, $23, $1 - bis $31, 1, $2 - beq $1, $177 - sll $2, 32, $1 - addq $6, $1, $6 -$177: - sll $25, 32, $23 - ldq $1, 0($18) - addq $0, $23, $0 - bis $0, $0, $7 - ldq $3, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $4 - zapnot $3, 15, $7 - mulq $8, $7, $28 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $25, 32, $1 - cmpult $0, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $24 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $28, $2, $28 - cmpult $28, $2, $1 - bis $31, 1, $2 - beq $1, $181 - sll $2, 32, $1 - addq $6, $1, $6 -$181: - sll $28, 32, $2 - addq $21, $2, $21 - bis $21, $21, $7 - addq $22, $7, $22 - stq $22, 8($16) - ldq $3, 16($17) - ldq $1, 0($18) - cmpult $22, $7, $4 - zapnot $3, 15, $7 - srl $1, 32, $8 - mulq $8, $7, $22 - zapnot $1, 15, $5 - mulq $7, $5, $20 - srl $28, 32, $1 - cmpult $21, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $4, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $3, 32, $6 - mulq $5, $6, $2 - mulq $6, $8, $6 - addq $22, $2, $22 - cmpult $22, $2, $1 - bis $31, 1, $2 - beq $1, $185 - sll $2, 32, $1 - addq $6, $1, $6 -$185: - sll $22, 32, $2 - ldq $1, 8($18) - addq $20, $2, $20 - bis $20, $20, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $22, 32, $1 - cmpult $20, $2, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $189 - sll $21, 32, $1 - addq $6, $1, $6 -$189: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 0($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $193 - sll $21, 32, $1 - addq $6, $1, $6 -$193: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 16($16) - ldq $4, 0($17) - ldq $5, 24($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $197 - sll $21, 32, $1 - addq $6, $1, $6 -$197: - sll $0, 32, $24 - ldq $1, 16($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $201 - sll $20, 32, $1 - addq $6, $1, $6 -$201: - sll $25, 32, $5 - ldq $2, 8($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $205 - sll $20, 32, $1 - addq $6, $1, $6 -$205: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $209 - sll $20, 32, $1 - addq $6, $1, $6 -$209: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 24($16) - ldq $4, 32($17) - ldq $5, 0($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $213 - sll $20, 32, $1 - addq $6, $1, $6 -$213: - sll $28, 32, $23 - ldq $1, 8($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $217 - sll $21, 32, $1 - addq $6, $1, $6 -$217: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $221 - sll $21, 32, $1 - addq $6, $1, $6 -$221: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $225 - sll $21, 32, $1 - addq $6, $1, $6 -$225: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $229 - sll $21, 32, $1 - addq $6, $1, $6 -$229: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 32($16) - ldq $4, 0($17) - ldq $5, 40($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $233 - sll $21, 32, $1 - addq $6, $1, $6 -$233: - sll $0, 32, $22 - ldq $1, 32($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $237 - sll $20, 32, $1 - addq $6, $1, $6 -$237: - sll $25, 32, $5 - ldq $2, 24($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $241 - sll $20, 32, $1 - addq $6, $1, $6 -$241: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $245 - sll $20, 32, $1 - addq $6, $1, $6 -$245: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $249 - sll $20, 32, $1 - addq $6, $1, $6 -$249: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $253 - sll $20, 32, $1 - addq $6, $1, $6 -$253: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 40($16) - ldq $4, 48($17) - ldq $5, 0($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $24, $28 - cmpult $28, $24, $1 - mulq $6, $8, $6 - beq $1, $257 - sll $20, 32, $1 - addq $6, $1, $6 -$257: - sll $28, 32, $24 - ldq $1, 8($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $261 - sll $21, 32, $1 - addq $6, $1, $6 -$261: - sll $25, 32, $5 - ldq $2, 16($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $265 - sll $21, 32, $1 - addq $6, $1, $6 -$265: - sll $28, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $269 - sll $21, 32, $1 - addq $6, $1, $6 -$269: - sll $0, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $273 - sll $21, 32, $1 - addq $6, $1, $6 -$273: - sll $28, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $277 - sll $21, 32, $1 - addq $6, $1, $6 -$277: - sll $0, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 0($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $281 - sll $21, 32, $1 - addq $6, $1, $6 -$281: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 48($16) - ldq $4, 0($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $23, $0 - cmpult $0, $23, $1 - mulq $6, $8, $6 - beq $1, $285 - sll $21, 32, $1 - addq $6, $1, $6 -$285: - sll $0, 32, $23 - ldq $1, 48($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 8($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $289 - sll $20, 32, $1 - addq $6, $1, $6 -$289: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 16($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $293 - sll $20, 32, $1 - addq $6, $1, $6 -$293: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $297 - sll $20, 32, $1 - addq $6, $1, $6 -$297: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $301 - sll $20, 32, $1 - addq $6, $1, $6 -$301: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $305 - sll $20, 32, $1 - addq $6, $1, $6 -$305: - sll $0, 32, $25 - ldq $2, 8($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $309 - sll $20, 32, $1 - addq $6, $1, $6 -$309: - sll $28, 32, $25 - ldq $2, 0($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $313 - sll $20, 32, $1 - addq $6, $1, $6 -$313: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 56($16) - ldq $4, 56($17) - ldq $5, 8($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $6 - beq $1, $317 - sll $20, 32, $1 - addq $6, $1, $6 -$317: - sll $28, 32, $22 - ldq $1, 16($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $321 - sll $21, 32, $1 - addq $6, $1, $6 -$321: - sll $25, 32, $5 - ldq $2, 24($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $325 - sll $21, 32, $1 - addq $6, $1, $6 -$325: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $329 - sll $21, 32, $1 - addq $6, $1, $6 -$329: - sll $0, 32, $25 - ldq $2, 40($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $333 - sll $21, 32, $1 - addq $6, $1, $6 -$333: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 16($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $337 - sll $21, 32, $1 - addq $6, $1, $6 -$337: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 8($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $341 - sll $21, 32, $1 - addq $6, $1, $6 -$341: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 64($16) - ldq $4, 16($17) - ldq $5, 56($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $345 - sll $21, 32, $1 - addq $6, $1, $6 -$345: - sll $0, 32, $24 - ldq $1, 48($18) - addq $2, $24, $2 - bis $2, $2, $7 - ldq $4, 24($17) - addq $23, $7, $23 - srl $1, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $24, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $349 - sll $20, 32, $1 - addq $6, $1, $6 -$349: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 32($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $353 - sll $20, 32, $1 - addq $6, $1, $6 -$353: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $357 - sll $20, 32, $1 - addq $6, $1, $6 -$357: - sll $0, 32, $25 - ldq $2, 24($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $361 - sll $20, 32, $1 - addq $6, $1, $6 -$361: - sll $28, 32, $25 - ldq $2, 16($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $365 - sll $20, 32, $1 - addq $6, $1, $6 -$365: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 72($16) - ldq $4, 56($17) - ldq $5, 24($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $369 - sll $20, 32, $1 - addq $6, $1, $6 -$369: - sll $28, 32, $23 - ldq $1, 32($18) - addq $2, $23, $2 - bis $2, $2, $7 - ldq $4, 48($17) - addq $22, $7, $22 - srl $1, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $0 - srl $28, 32, $1 - cmpult $2, $23, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $21 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $373 - sll $21, 32, $1 - addq $6, $1, $6 -$373: - sll $25, 32, $5 - ldq $2, 40($18) - addq $0, $5, $0 - bis $0, $0, $7 - ldq $4, 40($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $0, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $377 - sll $21, 32, $1 - addq $6, $1, $6 -$377: - sll $28, 32, $25 - ldq $2, 48($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 32($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $23, $23 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $381 - sll $21, 32, $1 - addq $6, $1, $6 -$381: - sll $0, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 24($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $385 - sll $21, 32, $1 - addq $6, $1, $6 -$385: - sll $28, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 80($16) - ldq $4, 32($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $28, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $0, $22, $0 - cmpult $0, $22, $1 - mulq $6, $8, $6 - beq $1, $389 - sll $21, 32, $1 - addq $6, $1, $6 -$389: - sll $0, 32, $22 - ldq $1, 48($18) - addq $2, $22, $2 - bis $2, $2, $7 - ldq $4, 40($17) - addq $24, $7, $24 - srl $1, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $25 - zapnot $1, 15, $5 - mulq $7, $5, $21 - srl $0, 32, $1 - cmpult $2, $22, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $22 - srl $4, 32, $6 - mulq $5, $6, $5 - bis $31, 1, $20 - addq $25, $5, $25 - cmpult $25, $5, $1 - mulq $6, $8, $6 - beq $1, $393 - sll $20, 32, $1 - addq $6, $1, $6 -$393: - sll $25, 32, $5 - ldq $2, 40($18) - addq $21, $5, $21 - bis $21, $21, $7 - ldq $4, 48($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $25, 32, $1 - addq $6, $1, $6 - cmpult $21, $5, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $397 - sll $20, 32, $1 - addq $6, $1, $6 -$397: - sll $28, 32, $25 - ldq $2, 32($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $24, $7, $24 - srl $2, 32, $8 - cmpult $24, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $21 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $22, $22 - addq $21, $25, $21 - cmpult $21, $25, $1 - mulq $6, $8, $6 - beq $1, $401 - sll $20, 32, $1 - addq $6, $1, $6 -$401: - sll $21, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $24, $7, $24 - stq $24, 88($16) - ldq $4, 56($17) - ldq $5, 40($18) - cmpult $24, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $0 - srl $21, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $23, $6, $23 - cmpult $23, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $24 - mulq $7, $5, $5 - addq $1, $22, $22 - addq $0, $24, $0 - cmpult $0, $24, $1 - mulq $6, $8, $6 - beq $1, $405 - sll $20, 32, $1 - addq $6, $1, $6 -$405: - sll $0, 32, $24 - ldq $2, 48($18) - addq $5, $24, $5 - bis $5, $5, $7 - ldq $4, 48($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $28 - srl $0, 32, $1 - addq $6, $1, $6 - cmpult $5, $24, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $24 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $5 - addq $28, $25, $28 - cmpult $28, $25, $1 - mulq $6, $8, $6 - beq $1, $409 - sll $20, 32, $1 - addq $6, $1, $6 -$409: - sll $28, 32, $25 - ldq $2, 56($18) - addq $5, $25, $5 - bis $5, $5, $7 - ldq $4, 40($17) - addq $23, $7, $23 - srl $2, 32, $8 - cmpult $23, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $25, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $1, $24, $24 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $413 - sll $20, 32, $1 - addq $6, $1, $6 -$413: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $23, $7, $23 - stq $23, 96($16) - ldq $4, 48($17) - ldq $5, 56($18) - cmpult $23, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $22, $6, $22 - cmpult $22, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $23 - mulq $7, $5, $5 - addq $1, $24, $24 - addq $28, $23, $28 - cmpult $28, $23, $1 - mulq $6, $8, $6 - beq $1, $417 - sll $20, 32, $1 - addq $6, $1, $6 -$417: - sll $28, 32, $23 - ldq $2, 48($18) - addq $5, $23, $5 - bis $5, $5, $7 - ldq $4, 56($17) - addq $22, $7, $22 - srl $2, 32, $8 - cmpult $22, $7, $3 - zapnot $4, 15, $7 - mulq $8, $7, $0 - srl $28, 32, $1 - addq $6, $1, $6 - cmpult $5, $23, $1 - zapnot $2, 15, $5 - addq $1, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $23 - srl $4, 32, $6 - mulq $5, $6, $25 - mulq $7, $5, $2 - addq $0, $25, $0 - cmpult $0, $25, $1 - mulq $6, $8, $6 - beq $1, $421 - sll $20, 32, $1 - addq $6, $1, $6 -$421: - sll $0, 32, $25 - addq $2, $25, $2 - bis $2, $2, $7 - addq $22, $7, $22 - stq $22, 104($16) - ldq $4, 56($17) - ldq $5, 56($18) - cmpult $22, $7, $3 - zapnot $4, 15, $7 - srl $5, 32, $8 - mulq $8, $7, $28 - srl $0, 32, $1 - cmpult $2, $25, $2 - addq $6, $1, $6 - addq $2, $6, $6 - addq $3, $6, $6 - addq $24, $6, $24 - cmpult $24, $6, $1 - srl $4, 32, $6 - zapnot $5, 15, $5 - mulq $5, $6, $22 - mulq $7, $5, $2 - addq $1, $23, $23 - addq $28, $22, $28 - cmpult $28, $22, $1 - mulq $6, $8, $3 - beq $1, $425 - sll $20, 32, $1 - addq $3, $1, $3 -$425: - sll $28, 32, $22 - srl $28, 32, $1 - addq $2, $22, $2 - addq $3, $1, $3 - bis $2, $2, $7 - addq $24, $7, $24 - cmpult $7, $22, $1 - cmpult $24, $7, $2 - addq $1, $3, $6 - addq $2, $6, $6 - stq $24, 112($16) - addq $23, $6, $23 - stq $23, 120($16) - ret $31, ($26), 1 - .end bn_mul_comba8 - .text - .align 3 - .globl bn_sqr_comba4 - .ent bn_sqr_comba4 -bn_sqr_comba4: -bn_sqr_comba4..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - bis $31, $31, $6 - mulq $0, $0, $4 - umulh $0, $0, $5 - stq $4, 0($16) - bis $31, $31, $4 - mulq $0, $1, $7 - umulh $0, $1, $8 - cmplt $7, $31, $22 - cmplt $8, $31, $23 - addq $7, $7, $7 - addq $8, $8, $8 - addq $8, $22, $8 - addq $4, $23, $4 - addq $5, $7, $5 - addq $6, $8, $6 - cmpult $5, $7, $24 - cmpult $6, $8, $25 - addq $6, $24, $6 - addq $4, $25, $4 - stq $5, 8($16) - bis $31, $31, $5 - mulq $1, $1, $27 - umulh $1, $1, $28 - addq $6, $27, $6 - addq $4, $28, $4 - cmpult $6, $27, $21 - cmpult $4, $28, $20 - addq $4, $21, $4 - addq $5, $20, $5 - mulq $2, $0, $19 - umulh $2, $0, $18 - cmplt $19, $31, $17 - cmplt $18, $31, $22 - addq $19, $19, $19 - addq $18, $18, $18 - addq $18, $17, $18 - addq $5, $22, $5 - addq $6, $19, $6 - addq $4, $18, $4 - cmpult $6, $19, $23 - cmpult $4, $18, $7 - addq $4, $23, $4 - addq $5, $7, $5 - stq $6, 16($16) - bis $31, $31, $6 - mulq $3, $0, $8 - umulh $3, $0, $24 - cmplt $8, $31, $25 - cmplt $24, $31, $27 - addq $8, $8, $8 - addq $24, $24, $24 - addq $24, $25, $24 - addq $6, $27, $6 - addq $4, $8, $4 - addq $5, $24, $5 - cmpult $4, $8, $28 - cmpult $5, $24, $21 - addq $5, $28, $5 - addq $6, $21, $6 - mulq $2, $1, $20 - umulh $2, $1, $17 - cmplt $20, $31, $22 - cmplt $17, $31, $19 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $22, $17 - addq $6, $19, $6 - addq $4, $20, $4 - addq $5, $17, $5 - cmpult $4, $20, $18 - cmpult $5, $17, $23 - addq $5, $18, $5 - addq $6, $23, $6 - stq $4, 24($16) - bis $31, $31, $4 - mulq $2, $2, $7 - umulh $2, $2, $25 - addq $5, $7, $5 - addq $6, $25, $6 - cmpult $5, $7, $27 - cmpult $6, $25, $8 - addq $6, $27, $6 - addq $4, $8, $4 - mulq $3, $1, $24 - umulh $3, $1, $28 - cmplt $24, $31, $21 - cmplt $28, $31, $22 - addq $24, $24, $24 - addq $28, $28, $28 - addq $28, $21, $28 - addq $4, $22, $4 - addq $5, $24, $5 - addq $6, $28, $6 - cmpult $5, $24, $19 - cmpult $6, $28, $20 - addq $6, $19, $6 - addq $4, $20, $4 - stq $5, 32($16) - bis $31, $31, $5 - mulq $3, $2, $17 - umulh $3, $2, $18 - cmplt $17, $31, $23 - cmplt $18, $31, $7 - addq $17, $17, $17 - addq $18, $18, $18 - addq $18, $23, $18 - addq $5, $7, $5 - addq $6, $17, $6 - addq $4, $18, $4 - cmpult $6, $17, $25 - cmpult $4, $18, $27 - addq $4, $25, $4 - addq $5, $27, $5 - stq $6, 40($16) - bis $31, $31, $6 - mulq $3, $3, $8 - umulh $3, $3, $21 - addq $4, $8, $4 - addq $5, $21, $5 - cmpult $4, $8, $22 - cmpult $5, $21, $24 - addq $5, $22, $5 - addq $6, $24, $6 - stq $4, 48($16) - stq $5, 56($16) - ret $31,($26),1 - .end bn_sqr_comba4 - .text - .align 3 - .globl bn_sqr_comba8 - .ent bn_sqr_comba8 -bn_sqr_comba8: -bn_sqr_comba8..ng: - .frame $30,0,$26,0 - .prologue 0 - - ldq $0, 0($17) - ldq $1, 8($17) - ldq $2, 16($17) - ldq $3, 24($17) - ldq $4, 32($17) - ldq $5, 40($17) - ldq $6, 48($17) - ldq $7, 56($17) - bis $31, $31, $23 - mulq $0, $0, $8 - umulh $0, $0, $22 - stq $8, 0($16) - bis $31, $31, $8 - mulq $1, $0, $24 - umulh $1, $0, $25 - cmplt $24, $31, $27 - cmplt $25, $31, $28 - addq $24, $24, $24 - addq $25, $25, $25 - addq $25, $27, $25 - addq $8, $28, $8 - addq $22, $24, $22 - addq $23, $25, $23 - cmpult $22, $24, $21 - cmpult $23, $25, $20 - addq $23, $21, $23 - addq $8, $20, $8 - stq $22, 8($16) - bis $31, $31, $22 - mulq $1, $1, $19 - umulh $1, $1, $18 - addq $23, $19, $23 - addq $8, $18, $8 - cmpult $23, $19, $17 - cmpult $8, $18, $27 - addq $8, $17, $8 - addq $22, $27, $22 - mulq $2, $0, $28 - umulh $2, $0, $24 - cmplt $28, $31, $25 - cmplt $24, $31, $21 - addq $28, $28, $28 - addq $24, $24, $24 - addq $24, $25, $24 - addq $22, $21, $22 - addq $23, $28, $23 - addq $8, $24, $8 - cmpult $23, $28, $20 - cmpult $8, $24, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 16($16) - bis $31, $31, $23 - mulq $2, $1, $18 - umulh $2, $1, $17 - cmplt $18, $31, $27 - cmplt $17, $31, $25 - addq $18, $18, $18 - addq $17, $17, $17 - addq $17, $27, $17 - addq $23, $25, $23 - addq $8, $18, $8 - addq $22, $17, $22 - cmpult $8, $18, $21 - cmpult $22, $17, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $3, $0, $24 - umulh $3, $0, $20 - cmplt $24, $31, $19 - cmplt $20, $31, $27 - addq $24, $24, $24 - addq $20, $20, $20 - addq $20, $19, $20 - addq $23, $27, $23 - addq $8, $24, $8 - addq $22, $20, $22 - cmpult $8, $24, $25 - cmpult $22, $20, $18 - addq $22, $25, $22 - addq $23, $18, $23 - stq $8, 24($16) - bis $31, $31, $8 - mulq $2, $2, $17 - umulh $2, $2, $21 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $28 - cmpult $23, $21, $19 - addq $23, $28, $23 - addq $8, $19, $8 - mulq $3, $1, $27 - umulh $3, $1, $24 - cmplt $27, $31, $20 - cmplt $24, $31, $25 - addq $27, $27, $27 - addq $24, $24, $24 - addq $24, $20, $24 - addq $8, $25, $8 - addq $22, $27, $22 - addq $23, $24, $23 - cmpult $22, $27, $18 - cmpult $23, $24, $17 - addq $23, $18, $23 - addq $8, $17, $8 - mulq $4, $0, $21 - umulh $4, $0, $28 - cmplt $21, $31, $19 - cmplt $28, $31, $20 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $19, $28 - addq $8, $20, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $25 - cmpult $23, $28, $27 - addq $23, $25, $23 - addq $8, $27, $8 - stq $22, 32($16) - bis $31, $31, $22 - mulq $3, $2, $24 - umulh $3, $2, $18 - cmplt $24, $31, $17 - cmplt $18, $31, $19 - addq $24, $24, $24 - addq $18, $18, $18 - addq $18, $17, $18 - addq $22, $19, $22 - addq $23, $24, $23 - addq $8, $18, $8 - cmpult $23, $24, $20 - cmpult $8, $18, $21 - addq $8, $20, $8 - addq $22, $21, $22 - mulq $4, $1, $28 - umulh $4, $1, $25 - cmplt $28, $31, $27 - cmplt $25, $31, $17 - addq $28, $28, $28 - addq $25, $25, $25 - addq $25, $27, $25 - addq $22, $17, $22 - addq $23, $28, $23 - addq $8, $25, $8 - cmpult $23, $28, $19 - cmpult $8, $25, $24 - addq $8, $19, $8 - addq $22, $24, $22 - mulq $5, $0, $18 - umulh $5, $0, $20 - cmplt $18, $31, $21 - cmplt $20, $31, $27 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $21, $20 - addq $22, $27, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $28 - addq $8, $17, $8 - addq $22, $28, $22 - stq $23, 40($16) - bis $31, $31, $23 - mulq $3, $3, $25 - umulh $3, $3, $19 - addq $8, $25, $8 - addq $22, $19, $22 - cmpult $8, $25, $24 - cmpult $22, $19, $21 - addq $22, $24, $22 - addq $23, $21, $23 - mulq $4, $2, $27 - umulh $4, $2, $18 - cmplt $27, $31, $20 - cmplt $18, $31, $17 - addq $27, $27, $27 - addq $18, $18, $18 - addq $18, $20, $18 - addq $23, $17, $23 - addq $8, $27, $8 - addq $22, $18, $22 - cmpult $8, $27, $28 - cmpult $22, $18, $25 - addq $22, $28, $22 - addq $23, $25, $23 - mulq $5, $1, $19 - umulh $5, $1, $24 - cmplt $19, $31, $21 - cmplt $24, $31, $20 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $21, $24 - addq $23, $20, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $17 - cmpult $22, $24, $27 - addq $22, $17, $22 - addq $23, $27, $23 - mulq $6, $0, $18 - umulh $6, $0, $28 - cmplt $18, $31, $25 - cmplt $28, $31, $21 - addq $18, $18, $18 - addq $28, $28, $28 - addq $28, $25, $28 - addq $23, $21, $23 - addq $8, $18, $8 - addq $22, $28, $22 - cmpult $8, $18, $20 - cmpult $22, $28, $19 - addq $22, $20, $22 - addq $23, $19, $23 - stq $8, 48($16) - bis $31, $31, $8 - mulq $4, $3, $24 - umulh $4, $3, $17 - cmplt $24, $31, $27 - cmplt $17, $31, $25 - addq $24, $24, $24 - addq $17, $17, $17 - addq $17, $27, $17 - addq $8, $25, $8 - addq $22, $24, $22 - addq $23, $17, $23 - cmpult $22, $24, $21 - cmpult $23, $17, $18 - addq $23, $21, $23 - addq $8, $18, $8 - mulq $5, $2, $28 - umulh $5, $2, $20 - cmplt $28, $31, $19 - cmplt $20, $31, $27 - addq $28, $28, $28 - addq $20, $20, $20 - addq $20, $19, $20 - addq $8, $27, $8 - addq $22, $28, $22 - addq $23, $20, $23 - cmpult $22, $28, $25 - cmpult $23, $20, $24 - addq $23, $25, $23 - addq $8, $24, $8 - mulq $6, $1, $17 - umulh $6, $1, $21 - cmplt $17, $31, $18 - cmplt $21, $31, $19 - addq $17, $17, $17 - addq $21, $21, $21 - addq $21, $18, $21 - addq $8, $19, $8 - addq $22, $17, $22 - addq $23, $21, $23 - cmpult $22, $17, $27 - cmpult $23, $21, $28 - addq $23, $27, $23 - addq $8, $28, $8 - mulq $7, $0, $20 - umulh $7, $0, $25 - cmplt $20, $31, $24 - cmplt $25, $31, $18 - addq $20, $20, $20 - addq $25, $25, $25 - addq $25, $24, $25 - addq $8, $18, $8 - addq $22, $20, $22 - addq $23, $25, $23 - cmpult $22, $20, $19 - cmpult $23, $25, $17 - addq $23, $19, $23 - addq $8, $17, $8 - stq $22, 56($16) - bis $31, $31, $22 - mulq $4, $4, $21 - umulh $4, $4, $27 - addq $23, $21, $23 - addq $8, $27, $8 - cmpult $23, $21, $28 - cmpult $8, $27, $24 - addq $8, $28, $8 - addq $22, $24, $22 - mulq $5, $3, $18 - umulh $5, $3, $20 - cmplt $18, $31, $25 - cmplt $20, $31, $19 - addq $18, $18, $18 - addq $20, $20, $20 - addq $20, $25, $20 - addq $22, $19, $22 - addq $23, $18, $23 - addq $8, $20, $8 - cmpult $23, $18, $17 - cmpult $8, $20, $21 - addq $8, $17, $8 - addq $22, $21, $22 - mulq $6, $2, $27 - umulh $6, $2, $28 - cmplt $27, $31, $24 - cmplt $28, $31, $25 - addq $27, $27, $27 - addq $28, $28, $28 - addq $28, $24, $28 - addq $22, $25, $22 - addq $23, $27, $23 - addq $8, $28, $8 - cmpult $23, $27, $19 - cmpult $8, $28, $18 - addq $8, $19, $8 - addq $22, $18, $22 - mulq $7, $1, $20 - umulh $7, $1, $17 - cmplt $20, $31, $21 - cmplt $17, $31, $24 - addq $20, $20, $20 - addq $17, $17, $17 - addq $17, $21, $17 - addq $22, $24, $22 - addq $23, $20, $23 - addq $8, $17, $8 - cmpult $23, $20, $25 - cmpult $8, $17, $27 - addq $8, $25, $8 - addq $22, $27, $22 - stq $23, 64($16) - bis $31, $31, $23 - mulq $5, $4, $28 - umulh $5, $4, $19 - cmplt $28, $31, $18 - cmplt $19, $31, $21 - addq $28, $28, $28 - addq $19, $19, $19 - addq $19, $18, $19 - addq $23, $21, $23 - addq $8, $28, $8 - addq $22, $19, $22 - cmpult $8, $28, $24 - cmpult $22, $19, $20 - addq $22, $24, $22 - addq $23, $20, $23 - mulq $6, $3, $17 - umulh $6, $3, $25 - cmplt $17, $31, $27 - cmplt $25, $31, $18 - addq $17, $17, $17 - addq $25, $25, $25 - addq $25, $27, $25 - addq $23, $18, $23 - addq $8, $17, $8 - addq $22, $25, $22 - cmpult $8, $17, $21 - cmpult $22, $25, $28 - addq $22, $21, $22 - addq $23, $28, $23 - mulq $7, $2, $19 - umulh $7, $2, $24 - cmplt $19, $31, $20 - cmplt $24, $31, $27 - addq $19, $19, $19 - addq $24, $24, $24 - addq $24, $20, $24 - addq $23, $27, $23 - addq $8, $19, $8 - addq $22, $24, $22 - cmpult $8, $19, $18 - cmpult $22, $24, $17 - addq $22, $18, $22 - addq $23, $17, $23 - stq $8, 72($16) - bis $31, $31, $8 - mulq $5, $5, $25 - umulh $5, $5, $21 - addq $22, $25, $22 - addq $23, $21, $23 - cmpult $22, $25, $28 - cmpult $23, $21, $20 - addq $23, $28, $23 - addq $8, $20, $8 - mulq $6, $4, $27 - umulh $6, $4, $19 - cmplt $27, $31, $24 - cmplt $19, $31, $18 - addq $27, $27, $27 - addq $19, $19, $19 - addq $19, $24, $19 - addq $8, $18, $8 - addq $22, $27, $22 - addq $23, $19, $23 - cmpult $22, $27, $17 - cmpult $23, $19, $25 - addq $23, $17, $23 - addq $8, $25, $8 - mulq $7, $3, $21 - umulh $7, $3, $28 - cmplt $21, $31, $20 - cmplt $28, $31, $24 - addq $21, $21, $21 - addq $28, $28, $28 - addq $28, $20, $28 - addq $8, $24, $8 - addq $22, $21, $22 - addq $23, $28, $23 - cmpult $22, $21, $18 - cmpult $23, $28, $27 - addq $23, $18, $23 - addq $8, $27, $8 - stq $22, 80($16) - bis $31, $31, $22 - mulq $6, $5, $19 - umulh $6, $5, $17 - cmplt $19, $31, $25 - cmplt $17, $31, $20 - addq $19, $19, $19 - addq $17, $17, $17 - addq $17, $25, $17 - addq $22, $20, $22 - addq $23, $19, $23 - addq $8, $17, $8 - cmpult $23, $19, $24 - cmpult $8, $17, $21 - addq $8, $24, $8 - addq $22, $21, $22 - mulq $7, $4, $28 - umulh $7, $4, $18 - cmplt $28, $31, $27 - cmplt $18, $31, $25 - addq $28, $28, $28 - addq $18, $18, $18 - addq $18, $27, $18 - addq $22, $25, $22 - addq $23, $28, $23 - addq $8, $18, $8 - cmpult $23, $28, $20 - cmpult $8, $18, $19 - addq $8, $20, $8 - addq $22, $19, $22 - stq $23, 88($16) - bis $31, $31, $23 - mulq $6, $6, $17 - umulh $6, $6, $24 - addq $8, $17, $8 - addq $22, $24, $22 - cmpult $8, $17, $21 - cmpult $22, $24, $27 - addq $22, $21, $22 - addq $23, $27, $23 - mulq $7, $5, $25 - umulh $7, $5, $28 - cmplt $25, $31, $18 - cmplt $28, $31, $20 - addq $25, $25, $25 - addq $28, $28, $28 - addq $28, $18, $28 - addq $23, $20, $23 - addq $8, $25, $8 - addq $22, $28, $22 - cmpult $8, $25, $19 - cmpult $22, $28, $17 - addq $22, $19, $22 - addq $23, $17, $23 - stq $8, 96($16) - bis $31, $31, $8 - mulq $7, $6, $24 - umulh $7, $6, $21 - cmplt $24, $31, $27 - cmplt $21, $31, $18 - addq $24, $24, $24 - addq $21, $21, $21 - addq $21, $27, $21 - addq $8, $18, $8 - addq $22, $24, $22 - addq $23, $21, $23 - cmpult $22, $24, $20 - cmpult $23, $21, $25 - addq $23, $20, $23 - addq $8, $25, $8 - stq $22, 104($16) - bis $31, $31, $22 - mulq $7, $7, $28 - umulh $7, $7, $19 - addq $23, $28, $23 - addq $8, $19, $8 - cmpult $23, $28, $17 - cmpult $8, $19, $27 - addq $8, $17, $8 - addq $22, $27, $22 - stq $23, 112($16) - stq $8, 120($16) - ret $31,($26),1 - .end bn_sqr_comba8 diff --git a/crypto/bn/asm/alpha.s.works b/crypto/bn/asm/alpha.s.works deleted file mode 100644 index ee6c587..0000000 --- a/crypto/bn/asm/alpha.s.works +++ /dev/null @@ -1,533 +0,0 @@ - - # DEC Alpha assember - # The bn_div64 is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div64. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - .align 3 -$42: - mulq $20,$19,$5 # 1 2 1 ###### - ldq $21,8($17) # 2 1 - ldq $2,8($16) # 2 1 - umulh $20,$19,$20 # 1 2 ###### - ldq $27,16($17) # 3 1 - ldq $3,16($16) # 3 1 - mulq $21,$19,$6 # 2 2 1 ###### - ldq $28,24($17) # 4 1 - addq $1,$5,$1 # 1 2 2 - ldq $4,24($16) # 4 1 - umulh $21,$19,$21 # 2 2 ###### - cmpult $1,$5,$22 # 1 2 3 1 - addq $20,$22,$20 # 1 3 1 - addq $1,$0,$1 # 1 2 3 1 - mulq $27,$19,$7 # 3 2 1 ###### - cmpult $1,$0,$0 # 1 2 3 2 - addq $2,$6,$2 # 2 2 2 - addq $20,$0,$0 # 1 3 2 - cmpult $2,$6,$23 # 2 2 3 1 - addq $21,$23,$21 # 2 3 1 - umulh $27,$19,$27 # 3 2 ###### - addq $2,$0,$2 # 2 2 3 1 - cmpult $2,$0,$0 # 2 2 3 2 - subq $18,4,$18 - mulq $28,$19,$8 # 4 2 1 ###### - addq $21,$0,$0 # 2 3 2 - addq $3,$7,$3 # 3 2 2 - addq $16,32,$16 - cmpult $3,$7,$24 # 3 2 3 1 - stq $1,-32($16) # 1 2 4 - umulh $28,$19,$28 # 4 2 ###### - addq $27,$24,$27 # 3 3 1 - addq $3,$0,$3 # 3 2 3 1 - stq $2,-24($16) # 2 2 4 - cmpult $3,$0,$0 # 3 2 3 2 - stq $3,-16($16) # 3 2 4 - addq $4,$8,$4 # 4 2 2 - addq $27,$0,$0 # 3 3 2 - cmpult $4,$8,$25 # 4 2 3 1 - addq $17,32,$17 - addq $28,$25,$28 # 4 3 1 - addq $4,$0,$4 # 4 2 3 1 - cmpult $4,$0,$0 # 4 2 3 2 - stq $4,-8($16) # 4 2 4 - addq $28,$0,$0 # 4 3 2 - blt $18,$43 - - ldq $20,0($17) # 1 1 - ldq $1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $20,0($17) # 4 1 - ldq $1,0($16) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $20,$19,$20 # 4 2 - addq $1,$5,$1 # 4 2 2 - cmpult $1,$5,$22 # 4 2 3 1 - addq $20,$22,$20 # 4 3 1 - addq $1,$0,$1 # 4 2 3 1 - cmpult $1,$0,$0 # 4 2 3 2 - addq $20,$0,$0 # 4 3 2 - stq $1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$0 - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$142: - - mulq $20,$19,$5 # 1 2 1 ##### - ldq $21,8($17) # 2 1 - ldq $27,16($17) # 3 1 - umulh $20,$19,$20 # 1 2 ##### - ldq $28,24($17) # 4 1 - mulq $21,$19,$6 # 2 2 1 ##### - addq $5,$0,$5 # 1 2 3 1 - subq $18,4,$18 - cmpult $5,$0,$0 # 1 2 3 2 - umulh $21,$19,$21 # 2 2 ##### - addq $20,$0,$0 # 1 3 2 - addq $17,32,$17 - addq $6,$0,$6 # 2 2 3 1 - mulq $27,$19,$7 # 3 2 1 ##### - cmpult $6,$0,$0 # 2 2 3 2 - addq $21,$0,$0 # 2 3 2 - addq $16,32,$16 - umulh $27,$19,$27 # 3 2 ##### - stq $5,-32($16) # 1 2 4 - mulq $28,$19,$8 # 4 2 1 ##### - addq $7,$0,$7 # 3 2 3 1 - stq $6,-24($16) # 2 2 4 - cmpult $7,$0,$0 # 3 2 3 2 - umulh $28,$19,$28 # 4 2 ##### - addq $27,$0,$0 # 3 3 2 - stq $7,-16($16) # 3 2 4 - addq $8,$0,$8 # 4 2 3 1 - cmpult $8,$0,$0 # 4 2 3 2 - - addq $28,$0,$0 # 4 3 2 - - stq $8,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $20,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $20,0($17) # 4 1 - mulq $20,$19,$5 # 4 2 1 - subq $18,1,$18 - umulh $20,$19,$20 # 4 2 - addq $5,$0,$5 # 4 2 3 1 - addq $16,8,$16 - cmpult $5,$0,$0 # 4 2 3 2 - addq $17,8,$17 - addq $20,$0,$0 # 4 3 2 - stq $5,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $20,0($17) # 1 1 - .align 3 -$542: - mulq $20,$20,$5 ###### - ldq $21,8($17) # 1 1 - subq $18,4 - umulh $20,$20,$1 ###### - ldq $27,16($17) # 1 1 - mulq $21,$21,$6 ###### - ldq $28,24($17) # 1 1 - stq $5,0($16) # r[0] - umulh $21,$21,$2 ###### - stq $1,8($16) # r[1] - mulq $27,$27,$7 ###### - stq $6,16($16) # r[0] - umulh $27,$27,$3 ###### - stq $2,24($16) # r[1] - mulq $28,$28,$8 ###### - stq $7,32($16) # r[0] - umulh $28,$28,$4 ###### - stq $3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $8,-16($16) # r[0] - stq $4,-8($16) # r[1] - - blt $18,$543 - ldq $20,0($17) # 1 1 - br $542 - -$442: - ldq $20,0($17) # a[0] - mulq $20,$20,$5 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $20,$20,$1 # a[0]*w high part r3 - stq $5,-16($16) # r[0] - stq $1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$0 # carry = 0 - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - .align 3 -$901: - addq $1,$5,$1 # r=a+b; - ldq $6,8($17) # a[1] - cmpult $1,$5,$22 # did we overflow? - ldq $2,8($18) # b[1] - addq $1,$0,$1 # c+= overflow - ldq $7,16($17) # a[2] - cmpult $1,$0,$0 # overflow? - ldq $3,16($18) # b[2] - addq $0,$22,$0 - ldq $8,24($17) # a[3] - addq $2,$6,$2 # r=a+b; - ldq $4,24($18) # b[3] - cmpult $2,$6,$23 # did we overflow? - addq $3,$7,$3 # r=a+b; - addq $2,$0,$2 # c+= overflow - cmpult $3,$7,$24 # did we overflow? - cmpult $2,$0,$0 # overflow? - addq $4,$8,$4 # r=a+b; - addq $0,$23,$0 - cmpult $4,$8,$25 # did we overflow? - addq $3,$0,$3 # c+= overflow - stq $1,0($16) # r[0]=c - cmpult $3,$0,$0 # overflow? - stq $2,8($16) # r[1]=c - addq $0,$24,$0 - stq $3,16($16) # r[2]=c - addq $4,$0,$4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $4,$0,$0 # overflow? - addq $17,32,$17 # a++ - addq $0,$25,$0 - stq $4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $5,0($17) # a[0] - ldq $1,0($18) # b[1] - addq $1,$5,$1 # r=a+b; - subq $19,1,$19 # loop-- - addq $1,$0,$1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $1,$5,$22 # did we overflow? - cmpult $1,$0,$0 # overflow? - addq $18,8,$18 # b++ - stq $1,0($16) # r[0]=c - addq $0,$22,$0 - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 - - .set noat - .text - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19, 4, $19 - bis $31, $31, $0 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) -$101: - ldq $3, 8($17) - cmpult $1, $2, $4 - ldq $5, 8($18) - subq $1, $2, $1 - ldq $6, 16($17) - cmpult $1, $0, $2 - ldq $7, 16($18) - subq $1, $0, $23 - ldq $8, 24($17) - addq $2, $4, $0 - cmpult $3, $5, $24 - subq $3, $5, $3 - ldq $22, 24($18) - cmpult $3, $0, $5 - subq $3, $0, $25 - addq $5, $24, $0 - cmpult $6, $7, $27 - subq $6, $7, $6 - stq $23, 0($16) - cmpult $6, $0, $7 - subq $6, $0, $28 - addq $7, $27, $0 - cmpult $8, $22, $21 - subq $8, $22, $8 - stq $25, 8($16) - cmpult $8, $0, $22 - subq $8, $0, $20 - addq $22, $21, $0 - stq $28, 16($16) - subq $19, 4, $19 - stq $20, 24($16) - addq $17, 32, $17 - addq $18, 32, $18 - addq $16, 32, $16 - blt $19, $100 - ldq $1, 0($17) - ldq $2, 0($18) - br $101 -$102: - ldq $1, 0($17) - ldq $2, 0($18) - cmpult $1, $2, $27 - subq $1, $2, $1 - cmpult $1, $0, $2 - subq $1, $0, $1 - stq $1, 0($16) - addq $2, $27, $0 - addq $17, 8, $17 - addq $18, 8, $18 - addq $16, 8, $16 - subq $19, 1, $19 - bgt $19, $102 - ret $31,($26),1 -$100: - addq $19, 4, $19 - bgt $19, $102 -$103: - ret $31,($26),1 - .end bn_sub_words diff --git a/crypto/bn/asm/alpha.works/add.pl b/crypto/bn/asm/alpha.works/add.pl deleted file mode 100644 index 4dc76e6..0000000 --- a/crypto/bn/asm/alpha.works/add.pl +++ /dev/null @@ -1,119 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($cc,$t0,$cc); # add the borrows - &st($o0,&QWPw(0,$rp)); # save - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/div.pl b/crypto/bn/asm/alpha.works/div.pl deleted file mode 100644 index 7ec1443..0000000 --- a/crypto/bn/asm/alpha.works/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div64 - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div64 - .ent bn_div64 -bn_div64: - ldgp $29,0($27) -bn_div64..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div64 -EOF - &asm_add($data); - } - -1; diff --git a/crypto/bn/asm/alpha.works/mul.pl b/crypto/bn/asm/alpha.works/mul.pl deleted file mode 100644 index b182bae..0000000 --- a/crypto/bn/asm/alpha.works/mul.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$word,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($l0,$cc,$l0); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &cmpult($l0,$cc,$cc); - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/mul_add.pl b/crypto/bn/asm/alpha.works/mul_add.pl deleted file mode 100644 index e37f631..0000000 --- a/crypto/bn/asm/alpha.works/mul_add.pl +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - &mul($a0,$word,($l0)=&NR(1)); - &sub($count,1,$count); - &add($ap,$QWS,$ap); - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - &add($r0,$l0,$r0); - &add($rp,$QWS,$rp); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &add($h0,$cc,$cc); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/mul_c4.pl b/crypto/bn/asm/alpha.works/mul_c4.pl deleted file mode 100644 index 5efd201..0000000 --- a/crypto/bn/asm/alpha.works/mul_c4.pl +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/mul_c4.works.pl b/crypto/bn/asm/alpha.works/mul_c4.works.pl deleted file mode 100644 index 79d86dd..0000000 --- a/crypto/bn/asm/alpha.works/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/mul_c8.pl b/crypto/bn/asm/alpha.works/mul_c8.pl deleted file mode 100644 index 525ca74..0000000 --- a/crypto/bn/asm/alpha.works/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/sqr.pl b/crypto/bn/asm/alpha.works/sqr.pl deleted file mode 100644 index a55b696..0000000 --- a/crypto/bn/asm/alpha.works/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/sqr_c4.pl b/crypto/bn/asm/alpha.works/sqr_c4.pl deleted file mode 100644 index bf33f5b..0000000 --- a/crypto/bn/asm/alpha.works/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/sqr_c8.pl b/crypto/bn/asm/alpha.works/sqr_c8.pl deleted file mode 100644 index b4afe08..0000000 --- a/crypto/bn/asm/alpha.works/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha.works/sub.pl b/crypto/bn/asm/alpha.works/sub.pl deleted file mode 100644 index d998da5..0000000 --- a/crypto/bn/asm/alpha.works/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/add.pl b/crypto/bn/asm/alpha/add.pl deleted file mode 100644 index 13bf516..0000000 --- a/crypto/bn/asm/alpha/add.pl +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_add_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - -########################################################## - &set_label("loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &ld(($b0)=&NR(1),&QWPw(0,$bp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &ld(($b1)=&NR(1),&QWPw(1,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &ld(($b2)=&NR(1),&QWPw(2,$bp)); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &ld(($b3)=&NR(1),&QWPw(3,$bp)); - &st($o0,&QWPw(0,$rp)); &FR($o0); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &st($o1,&QWPw(0,$rp)); &FR($o1); - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &cmpult($o3,$cc,$cc); - &st($o3,&QWPw(0,$rp)); &FR($o3); - &add($cc,$t3,$cc); &FR($t3); - - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - ### - &bge($count,&label("loop")); - ### - &br(&label("finish")); -################################################## - # Do the last 0..3 words - - ($t0,$o0)=&NR(2); - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($a0,$b0,$o0); - &sub($count,1,$count); - &cmpult($o0,$b0,$t0); # will we borrow? - &add($o0,$cc,$o0); # will we borrow? - &cmpult($o0,$cc,$cc); # will we borrow? - &add($rp,$QWS,$rp); - &st($o0,&QWPw(-1,$rp)); # save - &add($cc,$t0,$cc); # add the borrows - - ### - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($o0,$t0,$a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/div.pl b/crypto/bn/asm/alpha/div.pl deleted file mode 100644 index e9e6808..0000000 --- a/crypto/bn/asm/alpha/div.pl +++ /dev/null @@ -1,144 +0,0 @@ -#!/usr/local/bin/perl - -sub bn_div_words - { - local($data)=<<'EOF'; - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .set noreorder - .set volatile - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words - ldgp $29,0($27) -bn_div_words.ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$9119 - lda $0,-1 - br $31,$9136 - .align 4 -$9119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$9120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$9120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$9120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$9122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$9122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$9123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$9126 - zapnot $7,15,$27 - br $31,$9127 - .align 4 -$9126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$9127: - srl $10,32,$4 - .align 5 -$9128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$9129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$9129 - subq $27,1,$27 - br $31,$9128 - .align 4 -$9129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$9134 - addq $9,$11,$9 - subq $27,1,$27 -$9134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$9124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$9123 - .align 4 -$9124: - bis $13,$27,$0 -$9136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - &asm_add($data); - } - -1; diff --git a/crypto/bn/asm/alpha/mul.pl b/crypto/bn/asm/alpha/mul.pl deleted file mode 100644 index 76c9265..0000000 --- a/crypto/bn/asm/alpha/mul.pl +++ /dev/null @@ -1,104 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - ### wait 8 - &muh($a1,$word,($h1)=&NR(1)); &FR($a1); - &add($l0,$cc,$l0); ### wait 8 - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &cmpult($l0,$cc,$cc); ### wait 8 - &muh($a2,$word,($h2)=&NR(1)); &FR($a2); - &add($h0,$cc,$cc); &FR($h0); ### wait 8 - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($l1,$cc,$l1); ### wait 8 - &st($l0,&QWPw(0,$rp)); &FR($l0); - &cmpult($l1,$cc,$cc); ### wait 8 - &muh($a3,$word,($h3)=&NR(1)); &FR($a3); - &add($h1,$cc,$cc); &FR($h1); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($l2,$cc,$l2); - &st($l1,&QWPw(1,$rp)); &FR($l1); - &cmpult($l2,$cc,$cc); - &add($h2,$cc,$cc); &FR($h2); - &sub($count,4,$count); # count-=4 - &st($l2,&QWPw(2,$rp)); &FR($l2); - &add($l3,$cc,$l3); - &cmpult($l3,$cc,$cc); - &add($bp,4*$QWS,$bp); # count+=4 - &add($h3,$cc,$cc); &FR($h3); - &add($ap,4*$QWS,$ap); # count+=4 - &st($l3,&QWPw(3,$rp)); &FR($l3); - &add($rp,4*$QWS,$rp); # count+=4 - ### - &blt($count,&label("finish")); - ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap)); - &br(&label("finish")); -################################################## - -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - ### - ### - ### - &muh($a0,$word,($h0)=&NR(1)); - ### Wait 8 for next mul issue - &mul($a0,$word,($l0)=&NR(1)); &FR($a0) - &add($ap,$QWS,$ap); - ### Loose 12 until result is available - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &add($l0,$cc,$l0); - ### - &st($l0,&QWPw(-1,$rp)); &FR($l0); - &cmpult($l0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/mul_add.pl b/crypto/bn/asm/alpha/mul_add.pl deleted file mode 100644 index 0d6df69..0000000 --- a/crypto/bn/asm/alpha/mul_add.pl +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_add_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - $word=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - ### - &blt($count,&label("finish")); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - &ld(($r0)=&NR(1),&QWPw(0,$rp)); - &ld(($a1)=&NR(1),&QWPw(1,$ap)); - &muh($a0,$word,($h0)=&NR(1)); - &ld(($r1)=&NR(1),&QWPw(1,$rp)); - &ld(($a2)=&NR(1),&QWPw(2,$ap)); - ### - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &ld(($r2)=&NR(1),&QWPw(2,$rp)); - &muh($a1,$word,($h1)=&NR(1)); - &ld(($a3)=&NR(1),&QWPw(3,$ap)); - &mul($a1,$word,($l1)=&NR(1)); &FR($a1); - &ld(($r3)=&NR(1),&QWPw(3,$rp)); - &add($r0,$l0,$r0); - &add($r1,$l1,$r1); - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1); - &muh($a2,$word,($h2)=&NR(1)); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h1,$t1,$h1); &FR($t1); - &add($h0,$cc,$cc); &FR($h0); - &mul($a2,$word,($l2)=&NR(1)); &FR($a2); - &add($r1,$cc,$r1); - &cmpult($r1,$cc,$cc); - &add($r2,$l2,$r2); - &add($h1,$cc,$cc); &FR($h1); - &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2); - &muh($a3,$word,($h3)=&NR(1)); - &add($r2,$cc,$r2); - &st($r0,&QWPw(0,$rp)); &FR($r0); - &add($h2,$t2,$h2); &FR($t2); - &st($r1,&QWPw(1,$rp)); &FR($r1); - &cmpult($r2,$cc,$cc); - &mul($a3,$word,($l3)=&NR(1)); &FR($a3); - &add($h2,$cc,$cc); &FR($h2); - &st($r2,&QWPw(2,$rp)); &FR($r2); - &sub($count,4,$count); # count-=4 - &add($rp,4*$QWS,$rp); # count+=4 - &add($r3,$l3,$r3); - &add($ap,4*$QWS,$ap); # count+=4 - &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3); - &add($r3,$cc,$r3); - &add($h3,$t3,$h3); &FR($t3); - &cmpult($r3,$cc,$cc); - &st($r3,&QWPw(-1,$rp)); &FR($r3); - &add($h3,$cc,$cc); &FR($h3); - - ### - &blt($count,&label("finish")); - &ld(($a0)=&NR(1),&QWPw(0,$ap)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b - ### - ### - &muh($a0,$word,($h0)=&NR(1)); &FR($a0); - ### wait 8 - &mul($a0,$word,($l0)=&NR(1)); &FR($a0); - &add($rp,$QWS,$rp); - &add($ap,$QWS,$ap); - &sub($count,1,$count); - ### wait 3 until l0 is available - &add($r0,$l0,$r0); - ### - &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0); - &add($r0,$cc,$r0); - &add($h0,$t0,$h0); &FR($t0); - &cmpult($r0,$cc,$cc); - &add($h0,$cc,$cc); &FR($h0); - - &st($r0,&QWPw(-1,$rp)); &FR($r0); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/mul_c4.pl b/crypto/bn/asm/alpha/mul_c4.pl deleted file mode 100644 index 9cc876d..0000000 --- a/crypto/bn/asm/alpha/mul_c4.pl +++ /dev/null @@ -1,215 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -# upto - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &mul($a[0],$b[0],($r00)=&NR(1)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &muh($a[0],$b[0],($r01)=&NR(1)); - &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &mul($a[0],$b[1],($r02)=&NR(1)); - - ($R,$H1,$H2)=&NR(3); - - &st($r00,&QWPw(0,$rp)); &FR($r00); - - &mov("zero",$R); - &mul($a[1],$b[0],($r03)=&NR(1)); - - &mov("zero",$H1); - &mov("zero",$H0); - &add($R,$r01,$R); - &muh($a[0],$b[1],($r04)=&NR(1)); - &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01); - &add($R,$r02,$R); - &add($H1,$t01,$H1) &FR($t01); - &muh($a[1],$b[0],($r05)=&NR(1)); - &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02); - &add($R,$r03,$R); - &add($H2,$t02,$H2) &FR($t02); - &mul($a[0],$b[2],($r06)=&NR(1)); - &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03); - &add($H1,$t03,$H1) &FR($t03); - &st($R,&QWPw(1,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r04,$R); - &mov("zero",$H2); - &mul($a[1],$b[1],($r07)=&NR(1)); - &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04); - &add($R,$r05,$R); - &add($H1,$t04,$H1) &FR($t04); - &mul($a[2],$b[0],($r08)=&NR(1)); - &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05); - &add($R,$r01,$R); - &add($H2,$t05,$H2) &FR($t05); - &muh($a[0],$b[2],($r09)=&NR(1)); - &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06); - &add($R,$r07,$R); - &add($H1,$t06,$H1) &FR($t06); - &muh($a[1],$b[1],($r10)=&NR(1)); - &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07); - &add($R,$r08,$R); - &add($H2,$t07,$H2) &FR($t07); - &muh($a[2],$b[0],($r11)=&NR(1)); - &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08); - &add($H1,$t08,$H1) &FR($t08); - &st($R,&QWPw(2,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r09,$R); - &mov("zero",$H2); - &mul($a[0],$b[3],($r12)=&NR(1)); - &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09); - &add($R,$r10,$R); - &add($H1,$t09,$H1) &FR($t09); - &mul($a[1],$b[2],($r13)=&NR(1)); - &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10); - &add($R,$r11,$R); - &add($H1,$t10,$H1) &FR($t10); - &mul($a[2],$b[1],($r14)=&NR(1)); - &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11); - &add($R,$r12,$R); - &add($H1,$t11,$H1) &FR($t11); - &mul($a[3],$b[0],($r15)=&NR(1)); - &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12); - &add($R,$r13,$R); - &add($H1,$t12,$H1) &FR($t12); - &muh($a[0],$b[3],($r16)=&NR(1)); - &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13); - &add($R,$r14,$R); - &add($H1,$t13,$H1) &FR($t13); - &muh($a[1],$b[2],($r17)=&NR(1)); - &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14); - &add($R,$r15,$R); - &add($H1,$t14,$H1) &FR($t14); - &muh($a[2],$b[1],($r18)=&NR(1)); - &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15); - &add($H1,$t15,$H1) &FR($t15); - &st($R,&QWPw(3,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r16,$R); - &mov("zero",$H2); - &muh($a[3],$b[0],($r19)=&NR(1)); - &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16); - &add($R,$r17,$R); - &add($H1,$t16,$H1) &FR($t16); - &mul($a[1],$b[3],($r20)=&NR(1)); - &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17); - &add($R,$r18,$R); - &add($H1,$t17,$H1) &FR($t17); - &mul($a[2],$b[2],($r21)=&NR(1)); - &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18); - &add($R,$r19,$R); - &add($H1,$t18,$H1) &FR($t18); - &mul($a[3],$b[1],($r22)=&NR(1)); - &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19); - &add($R,$r20,$R); - &add($H1,$t19,$H1) &FR($t19); - &muh($a[1],$b[3],($r23)=&NR(1)); - &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20); - &add($R,$r21,$R); - &add($H1,$t20,$H1) &FR($t20); - &muh($a[2],$b[2],($r24)=&NR(1)); - &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21); - &add($R,$r22,$R); - &add($H1,$t21,$H1) &FR($t21); - &muh($a[3],$b[1],($r25)=&NR(1)); - &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22); - &add($H1,$t22,$H1) &FR($t22); - &st($R,&QWPw(4,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r23,$R); - &mov("zero",$H2); - &mul($a[2],$b[3],($r26)=&NR(1)); - &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23); - &add($R,$r24,$R); - &add($H1,$t23,$H1) &FR($t23); - &mul($a[3],$b[2],($r27)=&NR(1)); - &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24); - &add($R,$r25,$R); - &add($H1,$t24,$H1) &FR($t24); - &muh($a[2],$b[3],($r28)=&NR(1)); - &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25); - &add($R,$r26,$R); - &add($H1,$t25,$H1) &FR($t25); - &muh($a[3],$b[2],($r29)=&NR(1)); - &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26); - &add($R,$r27,$R); - &add($H1,$t26,$H1) &FR($t26); - &mul($a[3],$b[3],($r30)=&NR(1)); - &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27); - &add($H1,$t27,$H1) &FR($t27); - &st($R,&QWPw(5,$rp)); - &add($H1,$H2,$R); - - &mov("zero",$H1); - &add($R,$r28,$R); - &mov("zero",$H2); - &muh($a[3],$b[3],($r31)=&NR(1)); - &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28); - &add($R,$r29,$R); - &add($H1,$t28,$H1) &FR($t28); - ############ - &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29); - &add($R,$r30,$R); - &add($H1,$t29,$H1) &FR($t29); - ############ - &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30); - &add($H1,$t30,$H1) &FR($t30); - &st($R,&QWPw(6,$rp)); - &add($H1,$H2,$R); - - &add($R,$r31,$R); &FR($r31); - &st($R,&QWPw(7,$rp)); - - &FR($R,$H1,$H2); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/mul_c4.works.pl b/crypto/bn/asm/alpha/mul_c4.works.pl deleted file mode 100644 index 79d86dd..0000000 --- a/crypto/bn/asm/alpha/mul_c4.works.pl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub mul_add_c - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - -print STDERR "count=$cnt\n"; $cnt++; - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &add($t1,$h1,$h1); &FR($t1); - &add($c1,$h1,$c1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub bn_mul_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &FR($c0,$c1,$c2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/mul_c8.pl b/crypto/bn/asm/alpha/mul_c8.pl deleted file mode 100644 index 525ca74..0000000 --- a/crypto/bn/asm/alpha/mul_c8.pl +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_mul_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(3); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - - &function_begin($name,""); - - &comment(""); - - &stack_push(2); - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($b[0])=&NR(1),&QWPw(0,$bp)); - &st($reg_s0,&swtmp(0)); &FR($reg_s0); - &st($reg_s1,&swtmp(1)); &FR($reg_s1); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($b[1])=&NR(1),&QWPw(1,$bp)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($b[2])=&NR(1),&QWPw(2,$bp)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($b[3])=&NR(1),&QWPw(3,$bp)); - &ld(($a[4])=&NR(1),&QWPw(1,$ap)); - &ld(($b[4])=&NR(1),&QWPw(1,$bp)); - &ld(($a[5])=&NR(1),&QWPw(1,$ap)); - &ld(($b[5])=&NR(1),&QWPw(1,$bp)); - &ld(($a[6])=&NR(1),&QWPw(1,$ap)); - &ld(($b[6])=&NR(1),&QWPw(1,$bp)); - &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); - &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); - - ($c0,$c1,$c2)=&NR(3); - &mov("zero",$c2); - &mul($a[0],$b[0],$c0); - &muh($a[0],$b[0],$c1); - &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[1],$c0,$c1,$c2); - &mul_add_c($a[1],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[2],$c0,$c1,$c2); - &mul_add_c($a[1],$b[1],$c0,$c1,$c2); - &mul_add_c($a[2],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[3],$c0,$c1,$c2); - &mul_add_c($a[1],$b[2],$c0,$c1,$c2); - &mul_add_c($a[2],$b[1],$c0,$c1,$c2); - &mul_add_c($a[3],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[4],$c0,$c1,$c2); - &mul_add_c($a[1],$b[3],$c0,$c1,$c2); - &mul_add_c($a[2],$b[2],$c0,$c1,$c2); - &mul_add_c($a[3],$b[1],$c0,$c1,$c2); - &mul_add_c($a[4],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[5],$c0,$c1,$c2); - &mul_add_c($a[1],$b[4],$c0,$c1,$c2); - &mul_add_c($a[2],$b[3],$c0,$c1,$c2); - &mul_add_c($a[3],$b[2],$c0,$c1,$c2); - &mul_add_c($a[4],$b[1],$c0,$c1,$c2); - &mul_add_c($a[5],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[6],$c0,$c1,$c2); - &mul_add_c($a[1],$b[5],$c0,$c1,$c2); - &mul_add_c($a[2],$b[4],$c0,$c1,$c2); - &mul_add_c($a[3],$b[3],$c0,$c1,$c2); - &mul_add_c($a[4],$b[2],$c0,$c1,$c2); - &mul_add_c($a[5],$b[1],$c0,$c1,$c2); - &mul_add_c($a[6],$b[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); - &mul_add_c($a[1],$b[6],$c0,$c1,$c2); - &mul_add_c($a[2],$b[5],$c0,$c1,$c2); - &mul_add_c($a[3],$b[4],$c0,$c1,$c2); - &mul_add_c($a[4],$b[3],$c0,$c1,$c2); - &mul_add_c($a[5],$b[2],$c0,$c1,$c2); - &mul_add_c($a[6],$b[1],$c0,$c1,$c2); - &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); - &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); - &mul_add_c($a[2],$b[6],$c0,$c1,$c2); - &mul_add_c($a[3],$b[5],$c0,$c1,$c2); - &mul_add_c($a[4],$b[4],$c0,$c1,$c2); - &mul_add_c($a[5],$b[3],$c0,$c1,$c2); - &mul_add_c($a[6],$b[2],$c0,$c1,$c2); - &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); - &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); - &mul_add_c($a[3],$b[6],$c0,$c1,$c2); - &mul_add_c($a[4],$b[5],$c0,$c1,$c2); - &mul_add_c($a[5],$b[4],$c0,$c1,$c2); - &mul_add_c($a[6],$b[3],$c0,$c1,$c2); - &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); - &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); - &mul_add_c($a[4],$b[6],$c0,$c1,$c2); - &mul_add_c($a[5],$b[5],$c0,$c1,$c2); - &mul_add_c($a[6],$b[4],$c0,$c1,$c2); - &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); - &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); - &mul_add_c($a[5],$b[6],$c0,$c1,$c2); - &mul_add_c($a[6],$b[5],$c0,$c1,$c2); - &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); - &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); - &mul_add_c($a[6],$b[6],$c0,$c1,$c2); - &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); - &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); - &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); - &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &FR($c0,$c1,$c2); - - &ld($reg_s0,&swtmp(0)); - &ld($reg_s1,&swtmp(1)); - &stack_pop(2); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/sqr.pl b/crypto/bn/asm/alpha/sqr.pl deleted file mode 100644 index a55b696..0000000 --- a/crypto/bn/asm/alpha/sqr.pl +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_words - { - local($name)=@_; - local($cc,$a,$b,$r,$couny); - - &init_pool(3); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $count=&wparam(2); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &br(&label("finish")); - &blt($count,&label("finish")); - - ($a0,$r0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($r0,&QWPw(0,$rp)); - -$a=<<'EOF'; -########################################################## - &set_label("loop"); - - ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap)); - ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp)); - ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap)); - ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp)); - ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap)); - ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp)); - - ($o0,$t0)=&NR(2); - &add($a0,$b0,$o0); - &cmpult($o0,$b0,$t0); - &add($o0,$cc,$o0); - &cmpult($o0,$cc,$cc); - &add($cc,$t0,$cc); &FR($t0); - - ($t1,$o1)=&NR(2); - - &add($a1,$b1,$o1); &FR($a1); - &cmpult($o1,$b1,$t1); &FR($b1); - &add($o1,$cc,$o1); - &cmpult($o1,$cc,$cc); - &add($cc,$t1,$cc); &FR($t1); - - ($t2,$o2)=&NR(2); - - &add($a2,$b2,$o2); &FR($a2); - &cmpult($o2,$b2,$t2); &FR($b2); - &add($o2,$cc,$o2); - &cmpult($o2,$cc,$cc); - &add($cc,$t2,$cc); &FR($t2); - - ($t3,$o3)=&NR(2); - - &add($a3,$b3,$o3); &FR($a3); - &cmpult($o3,$b3,$t3); &FR($b3); - &add($o3,$cc,$o3); - &cmpult($o3,$cc,$cc); - &add($cc,$t3,$cc); &FR($t3); - - &st($o0,&QWPw(0,$rp)); &FR($o0); - &st($o1,&QWPw(0,$rp)); &FR($o1); - &st($o2,&QWPw(0,$rp)); &FR($o2); - &st($o3,&QWPw(0,$rp)); &FR($o3); - - &sub($count,4,$count); # count-=4 - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -EOF -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a - &mul($a0,$a0,($l0)=&NR(1)); - &add($ap,$QWS,$ap); - &add($rp,2*$QWS,$rp); - &sub($count,1,$count); - &muh($a0,$a0,($h0)=&NR(1)); &FR($a0); - &st($l0,&QWPw(-2,$rp)); &FR($l0); - &st($h0,&QWPw(-1,$rp)); &FR($h0); - - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/sqr_c4.pl b/crypto/bn/asm/alpha/sqr_c4.pl deleted file mode 100644 index bf33f5b..0000000 --- a/crypto/bn/asm/alpha/sqr_c4.pl +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub sqr_add_c - { - local($a,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$a,($l1)=&NR(1)); - &muh($a,$a,($h1)=&NR(1)); - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1); - &add($c1,$t1,$c1); &FR($t1); - &add($c2,$t2,$c2); &FR($t2); - } - -sub sqr_add_c2 - { - local($a,$b,$c0,$c1,$c2)=@_; - local($l1,$h1,$t1,$t2); - - &mul($a,$b,($l1)=&NR(1)); - &muh($a,$b,($h1)=&NR(1)); - &cmplt($l1,"zero",($lc1)=&NR(1)); - &cmplt($h1,"zero",($hc1)=&NR(1)); - &add($l1,$l1,$l1); - &add($h1,$h1,$h1); - &add($h1,$lc1,$h1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - - &add($c0,$l1,$c0); - &add($c1,$h1,$c1); - &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1); - &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1); - - &add($c1,$lc1,$c1); &FR($lc1); - &add($c2,$hc1,$c2); &FR($hc1); - } - - -sub bn_sqr_comba4 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - &st($c1,&QWPw(7,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/sqr_c8.pl b/crypto/bn/asm/alpha/sqr_c8.pl deleted file mode 100644 index b4afe08..0000000 --- a/crypto/bn/asm/alpha/sqr_c8.pl +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sqr_comba8 - { - local($name)=@_; - local(@a,@b,$r,$c0,$c1,$c2); - - $cnt=1; - &init_pool(2); - - $rp=&wparam(0); - $ap=&wparam(1); - - &function_begin($name,""); - - &comment(""); - - &ld(($a[0])=&NR(1),&QWPw(0,$ap)); - &ld(($a[1])=&NR(1),&QWPw(1,$ap)); - &ld(($a[2])=&NR(1),&QWPw(2,$ap)); - &ld(($a[3])=&NR(1),&QWPw(3,$ap)); - &ld(($a[4])=&NR(1),&QWPw(4,$ap)); - &ld(($a[5])=&NR(1),&QWPw(5,$ap)); - &ld(($a[6])=&NR(1),&QWPw(6,$ap)); - &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap); - - ($c0,$c1,$c2)=&NR(3); - - &mov("zero",$c2); - &mul($a[0],$a[0],$c0); - &muh($a[0],$a[0],$c1); - &st($c0,&QWPw(0,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(1,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[1],$c0,$c1,$c2); - &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(2,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(3,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[2],$c0,$c1,$c2); - &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(4,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(5,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[3],$c0,$c1,$c2); - &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(6,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2); - &st($c0,&QWPw(7,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[4],$c0,$c1,$c2); - &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2); - &st($c0,&QWPw(8,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2); - &st($c0,&QWPw(9,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[5],$c0,$c1,$c2); - &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2); - &st($c0,&QWPw(10,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2); - &st($c0,&QWPw(11,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[6],$c0,$c1,$c2); - &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2); - &st($c0,&QWPw(12,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2); - &st($c0,&QWPw(13,$rp)); - ($c0,$c1,$c2)=($c1,$c2,$c0); - &mov("zero",$c2); - - &sqr_add_c($a[7],$c0,$c1,$c2); - &st($c0,&QWPw(14,$rp)); - &st($c1,&QWPw(15,$rp)); - - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/alpha/sub.pl b/crypto/bn/asm/alpha/sub.pl deleted file mode 100644 index d998da5..0000000 --- a/crypto/bn/asm/alpha/sub.pl +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/local/bin/perl -# alpha assember - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - &init_pool(4); - ($cc)=GR("r0"); - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - ($a0,$b0)=&NR(2); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8); - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$o0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp); - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$o1); # will we borrow? - &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1); - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($o0,&QWPw(0,$rp)); &FR($o0); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$o2); # will we borrow? - &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2); - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($o1,&QWPw(1,$rp)); &FR($o1); - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$o3); # will we borrow? - &add($b3,$t3,$cc); &FR($t3,$a3,$b3); - - &st($o2,&QWPw(2,$rp)); &FR($o2); - &sub($count,4,$count); # count-=4 - &st($o3,&QWPw(3,$rp)); &FR($o3); - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &FR($a0,$b0); - &set_label("end"); - &function_end($name); - - &fin_pool; - } - -1; diff --git a/crypto/bn/asm/bn-alpha.pl b/crypto/bn/asm/bn-alpha.pl deleted file mode 100644 index 302edf2..0000000 --- a/crypto/bn/asm/bn-alpha.pl +++ /dev/null @@ -1,571 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -$d=&data(); -$d =~ s/CC/0/g; -$d =~ s/R1/1/g; -$d =~ s/R2/2/g; -$d =~ s/R3/3/g; -$d =~ s/R4/4/g; -$d =~ s/L1/5/g; -$d =~ s/L2/6/g; -$d =~ s/L3/7/g; -$d =~ s/L4/8/g; -$d =~ s/O1/22/g; -$d =~ s/O2/23/g; -$d =~ s/O3/24/g; -$d =~ s/O4/25/g; -$d =~ s/A1/20/g; -$d =~ s/A2/21/g; -$d =~ s/A3/27/g; -$d =~ s/A4/28/g; -if (0){ -} - -print $d; - -sub data - { - local($data)=<<'EOF'; - - # DEC Alpha assember - # The bn_div_words is actually gcc output but the other parts are hand done. - # Thanks to tzeruch@ceddec.com for sending me the gcc output for - # bn_div_words. - # I've gone back and re-done most of routines. - # The key thing to remeber for the 164 CPU is that while a - # multiply operation takes 8 cycles, another one can only be issued - # after 4 cycles have elapsed. I've done modification to help - # improve this. Also, normally, a ld instruction will not be available - # for about 3 cycles. - .file 1 "bn_asm.c" - .set noat -gcc2_compiled.: -__gnu_compiled_c: - .text - .align 3 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: -bn_mul_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - .align 3 -$42: - mulq $A1,$19,$L1 # 1 2 1 ###### - ldq $A2,8($17) # 2 1 - ldq $R2,8($16) # 2 1 - umulh $A1,$19,$A1 # 1 2 ###### - ldq $A3,16($17) # 3 1 - ldq $R3,16($16) # 3 1 - mulq $A2,$19,$L2 # 2 2 1 ###### - ldq $A4,24($17) # 4 1 - addq $R1,$L1,$R1 # 1 2 2 - ldq $R4,24($16) # 4 1 - umulh $A2,$19,$A2 # 2 2 ###### - cmpult $R1,$L1,$O1 # 1 2 3 1 - addq $A1,$O1,$A1 # 1 3 1 - addq $R1,$CC,$R1 # 1 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ###### - cmpult $R1,$CC,$CC # 1 2 3 2 - addq $R2,$L2,$R2 # 2 2 2 - addq $A1,$CC,$CC # 1 3 2 - cmpult $R2,$L2,$O2 # 2 2 3 1 - addq $A2,$O2,$A2 # 2 3 1 - umulh $A3,$19,$A3 # 3 2 ###### - addq $R2,$CC,$R2 # 2 2 3 1 - cmpult $R2,$CC,$CC # 2 2 3 2 - subq $18,4,$18 - mulq $A4,$19,$L4 # 4 2 1 ###### - addq $A2,$CC,$CC # 2 3 2 - addq $R3,$L3,$R3 # 3 2 2 - addq $16,32,$16 - cmpult $R3,$L3,$O3 # 3 2 3 1 - stq $R1,-32($16) # 1 2 4 - umulh $A4,$19,$A4 # 4 2 ###### - addq $A3,$O3,$A3 # 3 3 1 - addq $R3,$CC,$R3 # 3 2 3 1 - stq $R2,-24($16) # 2 2 4 - cmpult $R3,$CC,$CC # 3 2 3 2 - stq $R3,-16($16) # 3 2 4 - addq $R4,$L4,$R4 # 4 2 2 - addq $A3,$CC,$CC # 3 3 2 - cmpult $R4,$L4,$O4 # 4 2 3 1 - addq $17,32,$17 - addq $A4,$O4,$A4 # 4 3 1 - addq $R4,$CC,$R4 # 4 2 3 1 - cmpult $R4,$CC,$CC # 4 2 3 2 - stq $R4,-8($16) # 4 2 4 - addq $A4,$CC,$CC # 4 3 2 - blt $18,$43 - - ldq $A1,0($17) # 1 1 - ldq $R1,0($16) # 1 1 - - br $42 - - .align 4 -$45: - ldq $A1,0($17) # 4 1 - ldq $R1,0($16) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - addq $16,8,$16 - addq $17,8,$17 - umulh $A1,$19,$A1 # 4 2 - addq $R1,$L1,$R1 # 4 2 2 - cmpult $R1,$L1,$O1 # 4 2 3 1 - addq $A1,$O1,$A1 # 4 3 1 - addq $R1,$CC,$R1 # 4 2 3 1 - cmpult $R1,$CC,$CC # 4 2 3 2 - addq $A1,$CC,$CC # 4 3 2 - stq $R1,-8($16) # 4 2 4 - bgt $18,$45 - ret $31,($26),1 # else exit - - .align 4 -$43: - addq $18,4,$18 - bgt $18,$45 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_add_words - .align 3 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: -bn_mul_words..ng: - .frame $30,0,$26,0 - .prologue 0 - .align 5 - subq $18,4,$18 - bis $31,$31,$CC - blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$142: - - mulq $A1,$19,$L1 # 1 2 1 ##### - ldq $A2,8($17) # 2 1 - ldq $A3,16($17) # 3 1 - umulh $A1,$19,$A1 # 1 2 ##### - ldq $A4,24($17) # 4 1 - mulq $A2,$19,$L2 # 2 2 1 ##### - addq $L1,$CC,$L1 # 1 2 3 1 - subq $18,4,$18 - cmpult $L1,$CC,$CC # 1 2 3 2 - umulh $A2,$19,$A2 # 2 2 ##### - addq $A1,$CC,$CC # 1 3 2 - addq $17,32,$17 - addq $L2,$CC,$L2 # 2 2 3 1 - mulq $A3,$19,$L3 # 3 2 1 ##### - cmpult $L2,$CC,$CC # 2 2 3 2 - addq $A2,$CC,$CC # 2 3 2 - addq $16,32,$16 - umulh $A3,$19,$A3 # 3 2 ##### - stq $L1,-32($16) # 1 2 4 - mulq $A4,$19,$L4 # 4 2 1 ##### - addq $L3,$CC,$L3 # 3 2 3 1 - stq $L2,-24($16) # 2 2 4 - cmpult $L3,$CC,$CC # 3 2 3 2 - umulh $A4,$19,$A4 # 4 2 ##### - addq $A3,$CC,$CC # 3 3 2 - stq $L3,-16($16) # 3 2 4 - addq $L4,$CC,$L4 # 4 2 3 1 - cmpult $L4,$CC,$CC # 4 2 3 2 - - addq $A4,$CC,$CC # 4 3 2 - - stq $L4,-8($16) # 4 2 4 - - blt $18,$143 - - ldq $A1,0($17) # 1 1 - - br $142 - - .align 4 -$145: - ldq $A1,0($17) # 4 1 - mulq $A1,$19,$L1 # 4 2 1 - subq $18,1,$18 - umulh $A1,$19,$A1 # 4 2 - addq $L1,$CC,$L1 # 4 2 3 1 - addq $16,8,$16 - cmpult $L1,$CC,$CC # 4 2 3 2 - addq $17,8,$17 - addq $A1,$CC,$CC # 4 3 2 - stq $L1,-8($16) # 4 2 4 - - bgt $18,$145 - ret $31,($26),1 # else exit - - .align 4 -$143: - addq $18,4,$18 - bgt $18,$145 # goto tail code - ret $31,($26),1 # else exit - - .end bn_mul_words - .align 3 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: -bn_sqr_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $18,4,$18 - blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code - ldq $A1,0($17) # 1 1 - .align 3 -$542: - mulq $A1,$A1,$L1 ###### - ldq $A2,8($17) # 1 1 - subq $18,4 - umulh $A1,$A1,$R1 ###### - ldq $A3,16($17) # 1 1 - mulq $A2,$A2,$L2 ###### - ldq $A4,24($17) # 1 1 - stq $L1,0($16) # r[0] - umulh $A2,$A2,$R2 ###### - stq $R1,8($16) # r[1] - mulq $A3,$A3,$L3 ###### - stq $L2,16($16) # r[0] - umulh $A3,$A3,$R3 ###### - stq $R2,24($16) # r[1] - mulq $A4,$A4,$L4 ###### - stq $L3,32($16) # r[0] - umulh $A4,$A4,$R4 ###### - stq $R3,40($16) # r[1] - - addq $16,64,$16 - addq $17,32,$17 - stq $L4,-16($16) # r[0] - stq $R4,-8($16) # r[1] - - blt $18,$543 - ldq $A1,0($17) # 1 1 - br $542 - -$442: - ldq $A1,0($17) # a[0] - mulq $A1,$A1,$L1 # a[0]*w low part r2 - addq $16,16,$16 - addq $17,8,$17 - subq $18,1,$18 - umulh $A1,$A1,$R1 # a[0]*w high part r3 - stq $L1,-16($16) # r[0] - stq $R1,-8($16) # r[1] - - bgt $18,$442 - ret $31,($26),1 # else exit - - .align 4 -$543: - addq $18,4,$18 - bgt $18,$442 # goto tail code - ret $31,($26),1 # else exit - .end bn_sqr_words - - .align 3 - .globl bn_add_words - .ent bn_add_words -bn_add_words: -bn_add_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$901: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$900 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $901 - .align 4 -$945: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - addq $R1,$L1,$R1 # r=a+b; - subq $19,1,$19 # loop-- - addq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - cmpult $R1,$L1,$O1 # did we overflow? - cmpult $R1,$CC,$CC # overflow? - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $CC,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$945 - ret $31,($26),1 # else exit - -$900: - addq $19,4,$19 - bgt $19,$945 # goto tail code - ret $31,($26),1 # else exit - .end bn_add_words - - .align 3 - .globl bn_sub_words - .ent bn_sub_words -bn_sub_words: -bn_sub_words..ng: - .frame $30,0,$26,0 - .prologue 0 - - subq $19,4,$19 - bis $31,$31,$CC # carry = 0 - br $800 - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - .align 3 -$801: - addq $R1,$L1,$R1 # r=a+b; - ldq $L2,8($17) # a[1] - cmpult $R1,$L1,$O1 # did we overflow? - ldq $R2,8($18) # b[1] - addq $R1,$CC,$R1 # c+= overflow - ldq $L3,16($17) # a[2] - cmpult $R1,$CC,$CC # overflow? - ldq $R3,16($18) # b[2] - addq $CC,$O1,$CC - ldq $L4,24($17) # a[3] - addq $R2,$L2,$R2 # r=a+b; - ldq $R4,24($18) # b[3] - cmpult $R2,$L2,$O2 # did we overflow? - addq $R3,$L3,$R3 # r=a+b; - addq $R2,$CC,$R2 # c+= overflow - cmpult $R3,$L3,$O3 # did we overflow? - cmpult $R2,$CC,$CC # overflow? - addq $R4,$L4,$R4 # r=a+b; - addq $CC,$O2,$CC - cmpult $R4,$L4,$O4 # did we overflow? - addq $R3,$CC,$R3 # c+= overflow - stq $R1,0($16) # r[0]=c - cmpult $R3,$CC,$CC # overflow? - stq $R2,8($16) # r[1]=c - addq $CC,$O3,$CC - stq $R3,16($16) # r[2]=c - addq $R4,$CC,$R4 # c+= overflow - subq $19,4,$19 # loop-- - cmpult $R4,$CC,$CC # overflow? - addq $17,32,$17 # a++ - addq $CC,$O4,$CC - stq $R4,24($16) # r[3]=c - addq $18,32,$18 # b++ - addq $16,32,$16 # r++ - - blt $19,$800 - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - br $801 - .align 4 -$845: - ldq $L1,0($17) # a[0] - ldq $R1,0($18) # b[1] - cmpult $L1,$R1,$O1 # will we borrow? - subq $L1,$R1,$R1 # r=a-b; - subq $19,1,$19 # loop-- - cmpult $R1,$CC,$O2 # will we borrow? - subq $R1,$CC,$R1 # c+= overflow - addq $17,8,$17 # a++ - addq $18,8,$18 # b++ - stq $R1,0($16) # r[0]=c - addq $O2,$O1,$CC - addq $16,8,$16 # r++ - - bgt $19,$845 - ret $31,($26),1 # else exit - -$800: - addq $19,4,$19 - bgt $19,$845 # goto tail code - ret $31,($26),1 # else exit - .end bn_sub_words - - # - # What follows was taken directly from the C compiler with a few - # hacks to redo the lables. - # -.text - .align 3 - .globl bn_div_words - .ent bn_div_words -bn_div_words: - ldgp $29,0($27) -bn_div_words..ng: - lda $30,-48($30) - .frame $30,48,$26,0 - stq $26,0($30) - stq $9,8($30) - stq $10,16($30) - stq $11,24($30) - stq $12,32($30) - stq $13,40($30) - .mask 0x4003e00,-48 - .prologue 1 - bis $16,$16,$9 - bis $17,$17,$10 - bis $18,$18,$11 - bis $31,$31,$13 - bis $31,2,$12 - bne $11,$119 - lda $0,-1 - br $31,$136 - .align 4 -$119: - bis $11,$11,$16 - jsr $26,BN_num_bits_word - ldgp $29,0($26) - subq $0,64,$1 - beq $1,$120 - bis $31,1,$1 - sll $1,$0,$1 - cmpule $9,$1,$1 - bne $1,$120 - # lda $16,_IO_stderr_ - # lda $17,$C32 - # bis $0,$0,$18 - # jsr $26,fprintf - # ldgp $29,0($26) - jsr $26,abort - ldgp $29,0($26) - .align 4 -$120: - bis $31,64,$3 - cmpult $9,$11,$2 - subq $3,$0,$1 - addl $1,$31,$0 - subq $9,$11,$1 - cmoveq $2,$1,$9 - beq $0,$122 - zapnot $0,15,$2 - subq $3,$0,$1 - sll $11,$2,$11 - sll $9,$2,$3 - srl $10,$1,$1 - sll $10,$2,$10 - bis $3,$1,$9 -$122: - srl $11,32,$5 - zapnot $11,15,$6 - lda $7,-1 - .align 5 -$123: - srl $9,32,$1 - subq $1,$5,$1 - bne $1,$126 - zapnot $7,15,$27 - br $31,$127 - .align 4 -$126: - bis $9,$9,$24 - bis $5,$5,$25 - divqu $24,$25,$27 -$127: - srl $10,32,$4 - .align 5 -$128: - mulq $27,$5,$1 - subq $9,$1,$3 - zapnot $3,240,$1 - bne $1,$129 - mulq $6,$27,$2 - sll $3,32,$1 - addq $1,$4,$1 - cmpule $2,$1,$2 - bne $2,$129 - subq $27,1,$27 - br $31,$128 - .align 4 -$129: - mulq $27,$6,$1 - mulq $27,$5,$4 - srl $1,32,$3 - sll $1,32,$1 - addq $4,$3,$4 - cmpult $10,$1,$2 - subq $10,$1,$10 - addq $2,$4,$2 - cmpult $9,$2,$1 - bis $2,$2,$4 - beq $1,$134 - addq $9,$11,$9 - subq $27,1,$27 -$134: - subl $12,1,$12 - subq $9,$4,$9 - beq $12,$124 - sll $27,32,$13 - sll $9,32,$2 - srl $10,32,$1 - sll $10,32,$10 - bis $2,$1,$9 - br $31,$123 - .align 4 -$124: - bis $13,$27,$0 -$136: - ldq $26,0($30) - ldq $9,8($30) - ldq $10,16($30) - ldq $11,24($30) - ldq $12,32($30) - ldq $13,40($30) - addq $30,48,$30 - ret $31,($26),1 - .end bn_div_words -EOF - return($data); - } - diff --git a/crypto/bn/asm/ca.pl b/crypto/bn/asm/ca.pl deleted file mode 100644 index c1ce67a..0000000 --- a/crypto/bn/asm/ca.pl +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; -require "alpha/mul_add.pl"; -require "alpha/mul.pl"; -require "alpha/sqr.pl"; -require "alpha/add.pl"; -require "alpha/sub.pl"; -require "alpha/mul_c8.pl"; -require "alpha/mul_c4.pl"; -require "alpha/sqr_c4.pl"; -require "alpha/sqr_c8.pl"; -require "alpha/div.pl"; - -&asm_init($ARGV[0],$0); - -&bn_mul_words("bn_mul_words"); -&bn_sqr_words("bn_sqr_words"); -&bn_mul_add_words("bn_mul_add_words"); -&bn_add_words("bn_add_words"); -&bn_sub_words("bn_sub_words"); -&bn_div_words("bn_div_words"); -&bn_mul_comba8("bn_mul_comba8"); -&bn_mul_comba4("bn_mul_comba4"); -&bn_sqr_comba4("bn_sqr_comba4"); -&bn_sqr_comba8("bn_sqr_comba8"); - -&asm_finish(); - diff --git a/crypto/bn/asm/co-alpha.pl b/crypto/bn/asm/co-alpha.pl deleted file mode 100644 index 67dad3e..0000000 --- a/crypto/bn/asm/co-alpha.pl +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/local/bin/perl -# I have this in perl so I can use more usefull register names and then convert -# them into alpha registers. -# - -push(@INC,"perlasm","../../perlasm"); -require "alpha.pl"; - -&asm_init($ARGV[0],$0); - -print &bn_sub_words("bn_sub_words"); - -&asm_finish(); - -sub bn_sub_words - { - local($name)=@_; - local($cc,$a,$b,$r); - - $cc="r0"; - $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13"; - $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14"; - $a2="r3"; $b2="r7"; $r2="r11"; - $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15"; - - $rp=&wparam(0); - $ap=&wparam(1); - $bp=&wparam(2); - $count=&wparam(3); - - &function_begin($name,""); - - &comment(""); - &sub($count,4,$count); - &mov("zero",$cc); - &blt($count,&label("finish")); - - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - -########################################################## - &set_label("loop"); - - &ld($a1,&QWPw(1,$ap)); - &cmpult($a0,$b0,$tmp); # will we borrow? - &ld($b1,&QWPw(1,$bp)); - &sub($a0,$b0,$a0); # do the subtract - &ld($a2,&QWPw(2,$ap)); - &cmpult($a0,$cc,$b0); # will we borrow? - &ld($b2,&QWPw(2,$bp)); - &sub($a0,$cc,$a0); # will we borrow? - &ld($a3,&QWPw(3,$ap)); - &add($b0,$tmp,$cc); # add the borrows - - &cmpult($a1,$b1,$t1); # will we borrow? - &sub($a1,$b1,$a1); # do the subtract - &ld($b3,&QWPw(3,$bp)); - &cmpult($a1,$cc,$b1); # will we borrow? - &sub($a1,$cc,$a1); # will we borrow? - &add($b1,$t1,$cc); # add the borrows - - &cmpult($a2,$b2,$tmp); # will we borrow? - &sub($a2,$b2,$a2); # do the subtract - &st($a0,&QWPw(0,$rp)); # save - &cmpult($a2,$cc,$b2); # will we borrow? - &sub($a2,$cc,$a2); # will we borrow? - &add($b2,$tmp,$cc); # add the borrows - - &cmpult($a3,$b3,$t3); # will we borrow? - &sub($a3,$b3,$a3); # do the subtract - &st($a1,&QWPw(1,$rp)); # save - &cmpult($a3,$cc,$b3); # will we borrow? - &sub($a3,$cc,$a3); # will we borrow? - &add($b3,$t3,$cc); # add the borrows - - &st($a2,&QWPw(2,$rp)); # save - &sub($count,4,$count); # count-=4 - &st($a3,&QWPw(3,$rp)); # save - &add($ap,4*$QWS,$ap); # count+=4 - &add($bp,4*$QWS,$bp); # count+=4 - &add($rp,4*$QWS,$rp); # count+=4 - - &blt($count,&label("finish")); - &ld($a0,&QWPw(0,$ap)); - &ld($b0,&QWPw(0,$bp)); - &br(&label("loop")); -################################################## - # Do the last 0..3 words - - &set_label("last_loop"); - - &ld($a0,&QWPw(0,$ap)); # get a - &ld($b0,&QWPw(0,$bp)); # get b - &cmpult($a0,$b0,$tmp); # will we borrow? - &sub($a0,$b0,$a0); # do the subtract - &cmpult($a0,$cc,$b0); # will we borrow? - &sub($a0,$cc,$a0); # will we borrow? - &st($a0,&QWPw(0,$rp)); # save - &add($b0,$tmp,$cc); # add the borrows - - &add($ap,$QWS,$ap); - &add($bp,$QWS,$bp); - &add($rp,$QWS,$rp); - &sub($count,1,$count); - &bgt($count,&label("last_loop")); - &function_end_A($name); - -###################################################### - &set_label("finish"); - &add($count,4,$count); - &bgt($count,&label("last_loop")); - - &set_label("end"); - &function_end($name); - } - diff --git a/crypto/bn/asm/mips1.s b/crypto/bn/asm/mips1.s deleted file mode 100644 index 44fa125..0000000 --- a/crypto/bn/asm/mips1.s +++ /dev/null @@ -1,539 +0,0 @@ -/* This assember is for R2000/R3000 machines, or higher ones that do - * no want to do any 64 bit arithmatic. - * Make sure that the SSLeay bignum library is compiled with - * THIRTY_TWO_BIT set. - * This must either be compiled with the system CC, or, if you use GNU gas, - * cc -E mips1.s|gas -o mips1.o - */ - .set reorder - .set noat - -#define R1 $1 -#define CC $2 -#define R2 $3 -#define R3 $8 -#define R4 $9 -#define L1 $10 -#define L2 $11 -#define L3 $12 -#define L4 $13 -#define H1 $14 -#define H2 $15 -#define H3 $24 -#define H4 $25 - -#define P1 $4 -#define P2 $5 -#define P3 $6 -#define P4 $7 - - .align 2 - .ent bn_mul_add_words - .globl bn_mul_add_words -.text -bn_mul_add_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - #blt P3,4,$lab34 - - subu R1,P3,4 - move CC,$0 - bltz R1,$lab34 -$lab2: - lw R1,0(P1) - lw L1,0(P2) - lw R2,4(P1) - lw L2,4(P2) - lw R3,8(P1) - lw L3,8(P2) - lw R4,12(P1) - lw L4,12(P2) - multu L1,P4 - addu R1,R1,CC - mflo L1 - sltu CC,R1,CC - addu R1,R1,L1 - mfhi H1 - sltu L1,R1,L1 - sw R1,0(P1) - addu CC,CC,L1 - multu L2,P4 - addu CC,H1,CC - mflo L2 - addu R2,R2,CC - sltu CC,R2,CC - mfhi H2 - addu R2,R2,L2 - addu P2,P2,16 - sltu L2,R2,L2 - sw R2,4(P1) - addu CC,CC,L2 - multu L3,P4 - addu CC,H2,CC - mflo L3 - addu R3,R3,CC - sltu CC,R3,CC - mfhi H3 - addu R3,R3,L3 - addu P1,P1,16 - sltu L3,R3,L3 - sw R3,-8(P1) - addu CC,CC,L3 - multu L4,P4 - addu CC,H3,CC - mflo L4 - addu R4,R4,CC - sltu CC,R4,CC - mfhi H4 - addu R4,R4,L4 - subu P3,P3,4 - sltu L4,R4,L4 - addu CC,CC,L4 - addu CC,H4,CC - - subu R1,P3,4 - sw R4,-4(P1) # delay slot - bgez R1,$lab2 - - bleu P3,0,$lab3 - .align 2 -$lab33: - lw L1,0(P2) - lw R1,0(P1) - multu L1,P4 - addu R1,R1,CC - sltu CC,R1,CC - addu P1,P1,4 - mflo L1 - mfhi H1 - addu R1,R1,L1 - addu P2,P2,4 - sltu L1,R1,L1 - subu P3,P3,1 - addu CC,CC,L1 - sw R1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab33 - j $31 - .align 2 -$lab3: - j $31 - .align 2 -$lab34: - bgt P3,0,$lab33 - j $31 - .end bn_mul_add_words - - .align 2 - # Program Unit: bn_mul_words - .ent bn_mul_words - .globl bn_mul_words -.text -bn_mul_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - move CC,$0 - bltz P3,$lab45 -$lab44: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - multu L1,P4 - subu P3,P3,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - multu L2,P4 - sltu CC,L1,CC - sw L1,0(P1) - addu CC,H1,CC - mflo L2 - mfhi H2 - addu L2,L2,CC - multu L3,P4 - sltu CC,L2,CC - sw L2,4(P1) - addu CC,H2,CC - mflo L3 - mfhi H3 - addu L3,L3,CC - multu L4,P4 - sltu CC,L3,CC - sw L3,8(P1) - addu CC,H3,CC - mflo L4 - mfhi H4 - addu L4,L4,CC - addu P1,P1,16 - sltu CC,L4,CC - addu P2,P2,16 - addu CC,H4,CC - sw L4,-4(P1) - - bgez P3,$lab44 - b $lab45 -$lab46: - lw L1,0(P2) - addu P1,P1,4 - multu L1,P4 - addu P2,P2,4 - mflo L1 - mfhi H1 - addu L1,L1,CC - subu P3,P3,1 - sltu CC,L1,CC - sw L1,-4(P1) - addu CC,H1,CC - bgtz P3,$lab46 - j $31 -$lab45: - addu P3,P3,4 - bgtz P3,$lab46 - j $31 - .align 2 - .end bn_mul_words - - # Program Unit: bn_sqr_words - .ent bn_sqr_words - .globl bn_sqr_words -.text -bn_sqr_words: - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P3,P3,4 - bltz P3,$lab55 -$lab54: - lw L1,0(P2) - lw L2,4(P2) - lw L3,8(P2) - lw L4,12(P2) - - multu L1,L1 - subu P3,P3,4 - mflo L1 - mfhi H1 - sw L1,0(P1) - sw H1,4(P1) - - multu L2,L2 - addu P1,P1,32 - mflo L2 - mfhi H2 - sw L2,-24(P1) - sw H2,-20(P1) - - multu L3,L3 - addu P2,P2,16 - mflo L3 - mfhi H3 - sw L3,-16(P1) - sw H3,-12(P1) - - multu L4,L4 - - mflo L4 - mfhi H4 - sw L4,-8(P1) - sw H4,-4(P1) - - bgtz P3,$lab54 - b $lab55 -$lab56: - lw L1,0(P2) - addu P1,P1,8 - multu L1,L1 - addu P2,P2,4 - subu P3,P3,1 - mflo L1 - mfhi H1 - sw L1,-8(P1) - sw H1,-4(P1) - - bgtz P3,$lab56 - j $31 -$lab55: - addu P3,P3,4 - bgtz P3,$lab56 - j $31 - .align 2 - .end bn_sqr_words - - # Program Unit: bn_add_words - .ent bn_add_words - .globl bn_add_words -.text -bn_add_words: # 0x590 - .frame $sp,0,$31 - .mask 0x00000000,0 - .fmask 0x00000000,0 - - subu P4,P4,4 - move CC,$0 - bltz P4,$lab65 -$lab64: - lw L1,0(P2) - lw R1,0(P3) - lw L2,4(P2) - lw R2,4(P3) - - addu L1,L1,CC - lw L3,8(P2) - sltu CC,L1,CC - addu L1,L1,R1 - sltu R1,L1,R1 - lw R3,8(P3) - addu CC,CC,R1 - lw L4,12(P2) - - addu L2,L2,CC - lw R4,12(P3) - sltu CC,L2,CC - addu L2,L2,R2 - sltu R2,L2,R2 - sw L1,0(P1) - addu CC,CC,R2 - addu P1,P1,16 - addu L3,L3,CC - sw L2,-12(P1) - - sltu CC,L3,CC - addu L3,L3,R3 - sltu R3,L3,R3 - addu P2,P2,16 - addu CC,CC,R3 - - addu L4,L4,CC - addu P3,P3,16 - sltu CC,L4,CC - addu L4,L4,R4 - subu P4,P4,4 - sltu R4,L4,R4 - sw L3,-8(P1) - addu CC,CC,R4 - sw L4,-4(P1) - - bgtz P4,$lab64 - b $lab65 -$lab66: - lw L1,0(P2) - lw R1,0(P3) - addu L1,L1,CC - addu P1,P1,4 - sltu CC,L1,CC - addu P2,P2,4 - addu P3,P3,4 - addu L1,L1,R1 - subu P4,P4,1 - sltu R1,L1,R1 - sw L1,-4(P1) - addu CC,CC,R1 - - bgtz P4,$lab66 - j $31 -$lab65: - addu P4,P4,4 - bgtz P4,$lab66 - j $31 - .end bn_add_words - - # Program Unit: bn_div64 - .set at - .set reorder - .text - .align 2 - .globl bn_div64 - # 321 { - .ent bn_div64 2 -bn_div64: - subu $sp, 64 - sw $31, 56($sp) - sw $16, 48($sp) - .mask 0x80010000, -56 - .frame $sp, 64, $31 - move $9, $4 - move $12, $5 - move $16, $6 - # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t; - move $31, $0 - # 323 int i,count=2; - li $13, 2 - # 324 - # 325 if (d == 0) return(BN_MASK2); - bne $16, 0, $80 - li $2, -1 - b $93 -$80: - # 326 - # 327 i=BN_num_bits_word(d); - move $4, $16 - sw $31, 16($sp) - sw $9, 24($sp) - sw $12, 32($sp) - sw $13, 40($sp) - .livereg 0x800ff0e,0xfff - jal BN_num_bits_word - li $4, 32 - lw $31, 16($sp) - lw $9, 24($sp) - lw $12, 32($sp) - lw $13, 40($sp) - move $3, $2 - # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i)) - beq $2, $4, $81 - li $14, 1 - sll $15, $14, $2 - bleu $9, $15, $81 - # 329 { - # 330 #if !defined(NO_STDIO) && !defined(WIN16) - # 331 fprintf(stderr,"Division would overflow (%d)\n",i); - # 332 #endif - # 333 abort(); - sw $3, 8($sp) - sw $9, 24($sp) - sw $12, 32($sp) - sw $13, 40($sp) - sw $31, 26($sp) - .livereg 0xff0e,0xfff - jal abort - lw $3, 8($sp) - li $4, 32 - lw $9, 24($sp) - lw $12, 32($sp) - lw $13, 40($sp) - lw $31, 26($sp) - # 334 } -$81: - # 335 i=BN_BITS2-i; - subu $3, $4, $3 - # 336 if (h >= d) h-=d; - bltu $9, $16, $82 - subu $9, $9, $16 -$82: - # 337 - # 338 if (i) - beq $3, 0, $83 - # 339 { - # 340 d<<=i; - sll $16, $16, $3 - # 341 h=(h<<i)|(l>>(BN_BITS2-i)); - sll $24, $9, $3 - subu $25, $4, $3 - srl $14, $12, $25 - or $9, $24, $14 - # 342 l<<=i; - sll $12, $12, $3 - # 343 } -$83: - # 344 dh=(d&BN_MASK2h)>>BN_BITS4; - # 345 dl=(d&BN_MASK2l); - and $8, $16, -65536 - srl $8, $8, 16 - and $10, $16, 65535 - li $6, -65536 -$84: - # 346 for (;;) - # 347 { - # 348 if ((h>>BN_BITS4) == dh) - srl $15, $9, 16 - bne $8, $15, $85 - # 349 q=BN_MASK2l; - li $5, 65535 - b $86 -$85: - # 350 else - # 351 q=h/dh; - divu $5, $9, $8 -$86: - # 352 - # 353 for (;;) - # 354 { - # 355 t=(h-q*dh); - mul $4, $5, $8 - subu $2, $9, $4 - move $3, $2 - # 356 if ((t&BN_MASK2h) || - # 357 ((dl*q) <= ( - # 358 (t<<BN_BITS4)+ - # 359 ((l&BN_MASK2h)>>BN_BITS4)))) - and $25, $2, $6 - bne $25, $0, $87 - mul $24, $10, $5 - sll $14, $3, 16 - and $15, $12, $6 - srl $25, $15, 16 - addu $15, $14, $25 - bgtu $24, $15, $88 -$87: - # 360 break; - mul $3, $10, $5 - b $89 -$88: - # 361 q--; - addu $5, $5, -1 - # 362 } - b $86 -$89: - # 363 th=q*dh; - # 364 tl=q*dl; - # 365 t=(tl>>BN_BITS4); - # 366 tl=(tl<<BN_BITS4)&BN_MASK2h; - sll $14, $3, 16 - and $2, $14, $6 - move $11, $2 - # 367 th+=t; - srl $25, $3, 16 - addu $7, $4, $25 - # 368 - # 369 if (l < tl) th++; - bgeu $12, $2, $90 - addu $7, $7, 1 -$90: - # 370 l-=tl; - subu $12, $12, $11 - # 371 if (h < th) - bgeu $9, $7, $91 - # 372 { - # 373 h+=d; - addu $9, $9, $16 - # 374 q--; - addu $5, $5, -1 - # 375 } -$91: - # 376 h-=th; - subu $9, $9, $7 - # 377 - # 378 if (--count == 0) break; - addu $13, $13, -1 - beq $13, 0, $92 - # 379 - # 380 ret=q<<BN_BITS4; - sll $31, $5, 16 - # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2; - sll $24, $9, 16 - srl $15, $12, 16 - or $9, $24, $15 - # 382 l=(l&BN_MASK2l)<<BN_BITS4; - and $12, $12, 65535 - sll $12, $12, 16 - # 383 } - b $84 -$92: - # 384 ret|=q; - or $31, $31, $5 - # 385 return(ret); - move $2, $31 -$93: - lw $16, 48($sp) - lw $31, 56($sp) - addu $sp, 64 - j $31 - .end bn_div64 - diff --git a/crypto/bn/asm/pa-risc.s b/crypto/bn/asm/pa-risc.s deleted file mode 100644 index 775130a..0000000 --- a/crypto/bn/asm/pa-risc.s +++ /dev/null @@ -1,710 +0,0 @@ - .SPACE $PRIVATE$ - .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31 - .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82 - .SPACE $TEXT$ - .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 - .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY - .IMPORT $global$,DATA - .IMPORT $$dyncall,MILLICODE -; gcc_compiled.: - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_add_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r25),%r29 - ldo 12(%r26),%r23 - fldws -16(0,%r30),%fr11L -L$0002 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0005 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r26),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r26) - ldw -8(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0010 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -8(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-8(0,%r23) - ldw -4(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0015 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw -4(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,-4(0,%r23) - ldw 0(0,%r29),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0020 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi 1,%r19,%r19 - ldw 0(0,%r23),%r28 - addl %r20,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0003 - stw %r20,0(0,%r23) - ldo 16(%r29),%r29 - ldo 16(%r25),%r25 - ldo 16(%r23),%r23 - bl L$0002,0 - ldo 16(%r26),%r26 -L$0003 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_words - .PROC - .CALLINFO FRAME=0,CALLS,SAVE_RP - .ENTRY - stw %r2,-20(0,%r30) - ldi 0,%r28 - extru %r23,31,16,%r2 - stw %r2,-16(0,%r30) - extru %r23,15,16,%r23 - ldil L'65536,%r31 - fldws -16(0,%r30),%fr11R - stw %r23,-16(0,%r30) - ldo 12(%r26),%r29 - ldo 12(%r25),%r23 - fldws -16(0,%r30),%fr11L -L$0026 - ldw 0(0,%r25),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0029 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r26) - ldw -8(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0033 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-8(0,%r29) - ldw -4(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0037 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,-4(0,%r29) - ldw 0(0,%r23),%r19 - extru %r19,31,16,%r20 - stw %r20,-16(0,%r30) - extru %r19,15,16,%r19 - fldws -16(0,%r30),%fr22L - stw %r19,-16(0,%r30) - xmpyu %fr22L,%fr11R,%fr8 - fldws -16(0,%r30),%fr22L - fstws %fr8R,-16(0,%r30) - xmpyu %fr11R,%fr22L,%fr10 - ldw -16(0,%r30),%r2 - stw %r20,-16(0,%r30) - xmpyu %fr22L,%fr11L,%fr9 - fldws -16(0,%r30),%fr22L - fstws %fr10R,-16(0,%r30) - copy %r2,%r22 - ldw -16(0,%r30),%r2 - fstws %fr9R,-16(0,%r30) - xmpyu %fr11L,%fr22L,%fr8 - copy %r2,%r19 - ldw -16(0,%r30),%r2 - fstws %fr8R,-16(0,%r30) - copy %r2,%r20 - ldw -16(0,%r30),%r2 - addl %r2,%r19,%r21 - comclr,<<= %r19,%r21,0 - addl %r20,%r31,%r20 -L$0041 - extru %r21,15,16,%r19 - addl %r20,%r19,%r20 - zdep %r21,15,16,%r19 - addl %r22,%r19,%r22 - comclr,<<= %r19,%r22,0 - addi,tr 1,%r20,%r19 - copy %r20,%r19 - addl %r22,%r28,%r20 - comclr,<<= %r28,%r20,0 - addi,tr 1,%r19,%r28 - copy %r19,%r28 - addib,= -1,%r24,L$0027 - stw %r20,0(0,%r29) - ldo 16(%r23),%r23 - ldo 16(%r25),%r25 - ldo 16(%r29),%r29 - bl L$0026,0 - ldo 16(%r26),%r26 -L$0027 - ldw -20(0,%r30),%r2 - bv,n 0(%r2) - .EXIT - .PROCEND - .align 4 - .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR -bn_sqr_words - .PROC - .CALLINFO FRAME=0,NO_CALLS - .ENTRY - ldo 28(%r26),%r23 - ldo 12(%r25),%r28 -L$0046 - ldw 0(0,%r25),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,0(0,%r26) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-24(0,%r23) - ldw -8(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-20(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-16(0,%r23) - ldw -4(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-12(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,-8(0,%r23) - ldw 0(0,%r28),%r21 - extru %r21,31,16,%r22 - stw %r22,-16(0,%r30) - extru %r21,15,16,%r21 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r29 - stw %r22,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r21,-16(0,%r30) - copy %r29,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10L - stw %r21,-16(0,%r30) - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - extru %r19,16,17,%r20 - zdep %r19,14,15,%r19 - ldw -16(0,%r30),%r29 - xmpyu %fr10L,%fr10R,%fr9 - addl %r29,%r19,%r22 - stw %r22,-4(0,%r23) - fstws %fr9R,-16(0,%r30) - ldw -16(0,%r30),%r29 - addl %r29,%r20,%r21 - comclr,<<= %r19,%r22,0 - addi 1,%r21,%r21 - addib,= -1,%r24,L$0057 - stw %r21,0(0,%r23) - ldo 16(%r28),%r28 - ldo 16(%r25),%r25 - ldo 32(%r23),%r23 - bl L$0046,0 - ldo 32(%r26),%r26 -L$0057 - bv,n 0(%r2) - .EXIT - .PROCEND - .IMPORT BN_num_bits_word,CODE - .IMPORT fprintf,CODE - .IMPORT __iob,DATA - .SPACE $TEXT$ - .SUBSPA $LIT$ - - .align 4 -L$C0000 - .STRING "Division would overflow\x0a\x00" - .IMPORT abort,CODE - .SPACE $TEXT$ - .SUBSPA $CODE$ - - .align 4 - .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR -bn_div64 - .PROC - .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8 - .ENTRY - stw %r2,-20(0,%r30) - stwm %r8,128(0,%r30) - stw %r7,-124(0,%r30) - stw %r4,-112(0,%r30) - stw %r3,-108(0,%r30) - copy %r26,%r3 - copy %r25,%r4 - stw %r6,-120(0,%r30) - ldi 0,%r7 - stw %r5,-116(0,%r30) - movb,<> %r24,%r5,L$0059 - ldi 2,%r6 - bl L$0076,0 - ldi -1,%r28 -L$0059 - .CALL ARGW0=GR - bl BN_num_bits_word,%r2 - copy %r5,%r26 - ldi 32,%r19 - comb,= %r19,%r28,L$0060 - subi 31,%r28,%r19 - mtsar %r19 - zvdepi 1,32,%r19 - comb,>>= %r19,%r3,L$0060 - addil LR'__iob-$global$+32,%r27 - ldo RR'__iob-$global$+32(%r1),%r26 - ldil LR'L$C0000,%r25 - .CALL ARGW0=GR,ARGW1=GR - bl fprintf,%r2 - ldo RR'L$C0000(%r25),%r25 - .CALL - bl abort,%r2 - nop -L$0060 - comb,>> %r5,%r3,L$0061 - subi 32,%r28,%r28 - sub %r3,%r5,%r3 -L$0061 - comib,= 0,%r28,L$0062 - subi 31,%r28,%r19 - mtsar %r19 - zvdep %r5,32,%r5 - zvdep %r3,32,%r21 - subi 32,%r28,%r20 - mtsar %r20 - vshd 0,%r4,%r20 - or %r21,%r20,%r3 - mtsar %r19 - zvdep %r4,32,%r4 -L$0062 - extru %r5,15,16,%r23 - extru %r5,31,16,%r28 -L$0063 - extru %r3,15,16,%r19 - comb,<> %r23,%r19,L$0066 - copy %r3,%r26 - bl L$0067,0 - zdepi -1,31,16,%r29 -L$0066 - .IMPORT $$divU,MILLICODE - bl $$divU,%r31 - copy %r23,%r25 -L$0067 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - stw %r23,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr8 - fldws -16(0,%r30),%fr10R - fstws %fr8R,-16(0,%r30) - xmpyu %fr10L,%fr10R,%fr9 - ldw -16(0,%r30),%r8 - fstws %fr9R,-16(0,%r30) - copy %r8,%r22 - ldw -16(0,%r30),%r8 - extru %r4,15,16,%r24 - copy %r8,%r21 -L$0068 - sub %r3,%r21,%r20 - copy %r20,%r19 - depi 0,31,16,%r19 - comib,<> 0,%r19,L$0069 - zdep %r20,15,16,%r19 - addl %r19,%r24,%r19 - comb,>>= %r19,%r22,L$0069 - sub %r22,%r28,%r22 - sub %r21,%r23,%r21 - bl L$0068,0 - ldo -1(%r29),%r29 -L$0069 - stw %r29,-16(0,%r30) - fldws -16(0,%r30),%fr10L - stw %r28,-16(0,%r30) - fldws -16(0,%r30),%fr10R - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - ldw -16(0,%r30),%r8 - stw %r23,-16(0,%r30) - fldws -16(0,%r30),%fr10R - copy %r8,%r19 - xmpyu %fr10L,%fr10R,%fr8 - fstws %fr8R,-16(0,%r30) - extru %r19,15,16,%r20 - ldw -16(0,%r30),%r8 - zdep %r19,15,16,%r19 - addl %r8,%r20,%r20 - comclr,<<= %r19,%r4,0 - addi 1,%r20,%r20 - comb,<<= %r20,%r3,L$0074 - sub %r4,%r19,%r4 - addl %r3,%r5,%r3 - ldo -1(%r29),%r29 -L$0074 - addib,= -1,%r6,L$0064 - sub %r3,%r20,%r3 - zdep %r29,15,16,%r7 - shd %r3,%r4,16,%r3 - bl L$0063,0 - zdep %r4,15,16,%r4 -L$0064 - or %r7,%r29,%r28 -L$0076 - ldw -148(0,%r30),%r2 - ldw -124(0,%r30),%r7 - ldw -120(0,%r30),%r6 - ldw -116(0,%r30),%r5 - ldw -112(0,%r30),%r4 - ldw -108(0,%r30),%r3 - bv 0(%r2) - ldwm -128(0,%r30),%r8 - .EXIT - .PROCEND diff --git a/crypto/bn/asm/r3000.s b/crypto/bn/asm/r3000.s deleted file mode 100644 index e95269a..0000000 --- a/crypto/bn/asm/r3000.s +++ /dev/null @@ -1,646 +0,0 @@ - .file 1 "../bn_mulw.c" - .set nobopt - .option pic2 - - # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C - - # Cc1 defaults: - # -mabicalls - - # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1): - # -quiet -dumpbase -O2 -o - -gcc2_compiled.: -__gnu_compiled_c: - .rdata - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20 - .byte 0x24,0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24 - .byte 0x0 - - .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f - .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24 - .byte 0x0 - .text - .align 2 - .globl bn_mul_add_words - .ent bn_mul_add_words -bn_mul_add_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $12,$4 - move $14,$5 - move $9,$6 - move $13,$7 - move $8,$0 - addu $10,$12,12 - addu $11,$14,12 -$L2: - lw $6,0($14) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($12) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($12) - .set macro - .set reorder - - lw $6,-8($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-8($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-8($10) - .set macro - .set reorder - - lw $6,-4($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,-4($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,-4($10) - .set macro - .set reorder - - lw $6,0($11) - #nop - multu $13,$6 - mfhi $6 - mflo $7 - #nop - move $5,$8 - move $4,$0 - lw $3,0($10) - addu $9,$9,-1 - move $2,$0 - addu $7,$7,$3 - sltu $8,$7,$3 - addu $6,$6,$2 - addu $6,$6,$8 - addu $7,$7,$5 - sltu $2,$7,$5 - addu $6,$6,$4 - addu $6,$6,$2 - srl $3,$6,0 - move $2,$0 - move $8,$3 - .set noreorder - .set nomacro - beq $9,$0,$L3 - sw $7,0($10) - .set macro - .set reorder - - addu $11,$11,16 - addu $14,$14,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L2 - addu $12,$12,16 - .set macro - .set reorder - -$L3: - .set noreorder - .set nomacro - j $31 - move $2,$8 - .set macro - .set reorder - - .end bn_mul_add_words - .align 2 - .globl bn_mul_words - .ent bn_mul_words -bn_mul_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $11,$4 - move $12,$5 - move $8,$6 - move $6,$0 - addu $10,$11,12 - addu $9,$12,12 -$L10: - lw $4,0($12) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($11) - .set macro - .set reorder - - lw $4,-8($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-8($10) - .set macro - .set reorder - - lw $4,-4($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,-4($10) - .set macro - .set reorder - - lw $4,0($9) - #nop - multu $7,$4 - mfhi $4 - mflo $5 - #nop - move $3,$6 - move $2,$0 - addu $8,$8,-1 - addu $5,$5,$3 - sltu $6,$5,$3 - addu $4,$4,$2 - addu $4,$4,$6 - srl $3,$4,0 - move $2,$0 - move $6,$3 - .set noreorder - .set nomacro - beq $8,$0,$L11 - sw $5,0($10) - .set macro - .set reorder - - addu $9,$9,16 - addu $12,$12,16 - addu $10,$10,16 - .set noreorder - .set nomacro - j $L10 - addu $11,$11,16 - .set macro - .set reorder - -$L11: - .set noreorder - .set nomacro - j $31 - move $2,$6 - .set macro - .set reorder - - .end bn_mul_words - .align 2 - .globl bn_sqr_words - .ent bn_sqr_words -bn_sqr_words: - .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 - .mask 0x00000000,0 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - move $9,$4 - addu $7,$9,28 - addu $8,$5,12 -$L18: - lw $2,0($5) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,0($9) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-24($7) - .set macro - .set reorder - - lw $2,-8($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-20($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-16($7) - .set macro - .set reorder - - lw $2,-4($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-12($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,-8($7) - .set macro - .set reorder - - lw $2,0($8) - #nop - multu $2,$2 - mfhi $2 - mflo $3 - #nop - addu $6,$6,-1 - sw $3,-4($7) - srl $3,$2,0 - move $2,$0 - .set noreorder - .set nomacro - beq $6,$0,$L19 - sw $3,0($7) - .set macro - .set reorder - - addu $8,$8,16 - addu $5,$5,16 - addu $7,$7,32 - .set noreorder - .set nomacro - j $L18 - addu $9,$9,32 - .set macro - .set reorder - -$L19: - j $31 - .end bn_sqr_words - .rdata - .align 2 -$LC0: - - .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e - .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f - .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa - .byte 0x0 - .text - .align 2 - .globl bn_div64 - .ent bn_div64 -bn_div64: - .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8 - .mask 0x901f0000,-8 - .fmask 0x00000000,0 - .set noreorder - .cpload $25 - .set reorder - subu $sp,$sp,56 - .cprestore 16 - sw $16,24($sp) - move $16,$4 - sw $17,28($sp) - move $17,$5 - sw $18,32($sp) - move $18,$6 - sw $20,40($sp) - move $20,$0 - sw $19,36($sp) - li $19,0x00000002 # 2 - sw $31,48($sp) - .set noreorder - .set nomacro - bne $18,$0,$L26 - sw $28,44($sp) - .set macro - .set reorder - - .set noreorder - .set nomacro - j $L43 - li $2,-1 # 0xffffffff - .set macro - .set reorder - -$L26: - move $4,$18 - jal BN_num_bits_word - move $4,$2 - li $2,0x00000020 # 32 - .set noreorder - .set nomacro - beq $4,$2,$L27 - li $2,0x00000001 # 1 - .set macro - .set reorder - - sll $2,$2,$4 - sltu $2,$2,$16 - .set noreorder - .set nomacro - beq $2,$0,$L44 - li $5,0x00000020 # 32 - .set macro - .set reorder - - la $4,__iob+32 - la $5,$LC0 - jal fprintf - jal abort -$L27: - li $5,0x00000020 # 32 -$L44: - sltu $2,$16,$18 - .set noreorder - .set nomacro - bne $2,$0,$L28 - subu $4,$5,$4 - .set macro - .set reorder - - subu $16,$16,$18 -$L28: - .set noreorder - .set nomacro - beq $4,$0,$L29 - li $10,-65536 # 0xffff0000 - .set macro - .set reorder - - sll $18,$18,$4 - sll $3,$16,$4 - subu $2,$5,$4 - srl $2,$17,$2 - or $16,$3,$2 - sll $17,$17,$4 -$L29: - srl $7,$18,16 - andi $9,$18,0xffff -$L30: - srl $2,$16,16 - .set noreorder - .set nomacro - beq $2,$7,$L34 - li $6,0x0000ffff # 65535 - .set macro - .set reorder - - divu $6,$16,$7 -$L34: - mult $6,$9 - mflo $5 - #nop - #nop - mult $6,$7 - and $2,$17,$10 - srl $8,$2,16 - mflo $4 -$L35: - subu $3,$16,$4 - and $2,$3,$10 - .set noreorder - .set nomacro - bne $2,$0,$L36 - sll $2,$3,16 - .set macro - .set reorder - - addu $2,$2,$8 - sltu $2,$2,$5 - .set noreorder - .set nomacro - beq $2,$0,$L36 - subu $5,$5,$9 - .set macro - .set reorder - - subu $4,$4,$7 - .set noreorder - .set nomacro - j $L35 - addu $6,$6,-1 - .set macro - .set reorder - -$L36: - mult $6,$7 - mflo $5 - #nop - #nop - mult $6,$9 - mflo $4 - #nop - #nop - srl $3,$4,16 - sll $2,$4,16 - and $4,$2,$10 - sltu $2,$17,$4 - .set noreorder - .set nomacro - beq $2,$0,$L40 - addu $5,$5,$3 - .set macro - .set reorder - - addu $5,$5,1 -$L40: - sltu $2,$16,$5 - .set noreorder - .set nomacro - beq $2,$0,$L41 - subu $17,$17,$4 - .set macro - .set reorder - - addu $16,$16,$18 - addu $6,$6,-1 -$L41: - addu $19,$19,-1 - .set noreorder - .set nomacro - beq $19,$0,$L31 - subu $16,$16,$5 - .set macro - .set reorder - - sll $20,$6,16 - sll $3,$16,16 - srl $2,$17,16 - or $16,$3,$2 - .set noreorder - .set nomacro - j $L30 - sll $17,$17,16 - .set macro - .set reorder - -$L31: - or $2,$20,$6 -$L43: - lw $31,48($sp) - lw $20,40($sp) - lw $19,36($sp) - lw $18,32($sp) - lw $17,28($sp) - lw $16,24($sp) - addu $sp,$sp,56 - j $31 - .end bn_div64 - - .globl abort .text - .globl fprintf .text - .globl BN_num_bits_word .text |