diff options
Diffstat (limited to 'arch/m68k/ifpsp060/src/ilsp.S')
-rw-r--r-- | arch/m68k/ifpsp060/src/ilsp.S | 932 |
1 files changed, 932 insertions, 0 deletions
diff --git a/arch/m68k/ifpsp060/src/ilsp.S b/arch/m68k/ifpsp060/src/ilsp.S new file mode 100644 index 0000000..afa7422 --- /dev/null +++ b/arch/m68k/ifpsp060/src/ilsp.S @@ -0,0 +1,932 @@ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +M68000 Hi-Performance Microprocessor Division +M68060 Software Package +Production Release P1.00 -- October 10, 1994 + +M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. + +THE SOFTWARE is provided on an "AS IS" basis and without warranty. +To the maximum extent permitted by applicable law, +MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE +and any warranty against infringement with regard to the SOFTWARE +(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. + +To the maximum extent permitted by applicable law, +IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, +BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) +ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. +Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. + +You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE +so long as this entire notice is retained without alteration in any modified and/or +redistributed versions, and that such modified versions are clearly identified as such. +No licenses are granted by implication, estoppel or otherwise under any patents +or trademarks of Motorola, Inc. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# litop.s: +# This file is appended to the top of the 060FPLSP package +# and contains the entry points into the package. The user, in +# effect, branches to one of the branch table entries located here. +# + + bra.l _060LSP__idivs64_ + short 0x0000 + bra.l _060LSP__idivu64_ + short 0x0000 + + bra.l _060LSP__imuls64_ + short 0x0000 + bra.l _060LSP__imulu64_ + short 0x0000 + + bra.l _060LSP__cmp2_Ab_ + short 0x0000 + bra.l _060LSP__cmp2_Aw_ + short 0x0000 + bra.l _060LSP__cmp2_Al_ + short 0x0000 + bra.l _060LSP__cmp2_Db_ + short 0x0000 + bra.l _060LSP__cmp2_Dw_ + short 0x0000 + bra.l _060LSP__cmp2_Dl_ + short 0x0000 + +# leave room for future possible aditions. + align 0x200 + +######################################################################### +# XDEF **************************************************************** # +# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. # +# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. # +# # +# This is the library version which is accessed as a subroutine # +# and therefore does not work exactly like the 680X0 div{s,u}.l # +# 64-bit divide instruction. # +# # +# XREF **************************************************************** # +# None. # +# # +# INPUT *************************************************************** # +# 0x4(sp) = divisor # +# 0x8(sp) = hi(dividend) # +# 0xc(sp) = lo(dividend) # +# 0x10(sp) = pointer to location to place quotient/remainder # +# # +# OUTPUT ************************************************************** # +# 0x10(sp) = points to location of remainder/quotient. # +# remainder is in first longword, quotient is in 2nd. # +# # +# ALGORITHM *********************************************************** # +# If the operands are signed, make them unsigned and save the # +# sign info for later. Separate out special cases like divide-by-zero # +# or 32-bit divides if possible. Else, use a special math algorithm # +# to calculate the result. # +# Restore sign info if signed instruction. Set the condition # +# codes before performing the final "rts". If the divisor was equal to # +# zero, then perform a divide-by-zero using a 16-bit implemented # +# divide instruction. This way, the operating system can record that # +# the event occurred even though it may not point to the correct place. # +# # +######################################################################### + +set POSNEG, -1 +set NDIVISOR, -2 +set NDIVIDEND, -3 +set DDSECOND, -4 +set DDNORMAL, -8 +set DDQUOTIENT, -12 +set DIV64_CC, -16 + +########## +# divs.l # +########## + global _060LSP__idivs64_ +_060LSP__idivs64_: +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-16 + movm.l &0x3f00,-(%sp) # save d2-d7 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,DIV64_CC(%a6) + st POSNEG(%a6) # signed operation + bra.b ldiv64_cont + +########## +# divu.l # +########## + global _060LSP__idivu64_ +_060LSP__idivu64_: +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-16 + movm.l &0x3f00,-(%sp) # save d2-d7 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,DIV64_CC(%a6) + sf POSNEG(%a6) # unsigned operation + +ldiv64_cont: + mov.l 0x8(%a6),%d7 # fetch divisor + + beq.w ldiv64eq0 # divisor is = 0!!! + + mov.l 0xc(%a6), %d5 # get dividend hi + mov.l 0x10(%a6), %d6 # get dividend lo + +# separate signed and unsigned divide + tst.b POSNEG(%a6) # signed or unsigned? + beq.b ldspecialcases # use positive divide + +# save the sign of the divisor +# make divisor unsigned if it's negative + tst.l %d7 # chk sign of divisor + slt NDIVISOR(%a6) # save sign of divisor + bpl.b ldsgndividend + neg.l %d7 # complement negative divisor + +# save the sign of the dividend +# make dividend unsigned if it's negative +ldsgndividend: + tst.l %d5 # chk sign of hi(dividend) + slt NDIVIDEND(%a6) # save sign of dividend + bpl.b ldspecialcases + + mov.w &0x0, %cc # clear 'X' cc bit + negx.l %d6 # complement signed dividend + negx.l %d5 + +# extract some special cases: +# - is (dividend == 0) ? +# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div) +ldspecialcases: + tst.l %d5 # is (hi(dividend) == 0) + bne.b ldnormaldivide # no, so try it the long way + + tst.l %d6 # is (lo(dividend) == 0), too + beq.w lddone # yes, so (dividend == 0) + + cmp.l %d7,%d6 # is (divisor <= lo(dividend)) + bls.b ld32bitdivide # yes, so use 32 bit divide + + exg %d5,%d6 # q = 0, r = dividend + bra.w ldivfinish # can't divide, we're done. + +ld32bitdivide: + tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div! + + bra.b ldivfinish + +ldnormaldivide: +# last special case: +# - is hi(dividend) >= divisor ? if yes, then overflow + cmp.l %d7,%d5 + bls.b lddovf # answer won't fit in 32 bits + +# perform the divide algorithm: + bsr.l ldclassical # do int divide + +# separate into signed and unsigned finishes. +ldivfinish: + tst.b POSNEG(%a6) # do divs, divu separately + beq.b lddone # divu has no processing!!! + +# it was a divs.l, so ccode setting is a little more complicated... + tst.b NDIVIDEND(%a6) # remainder has same sign + beq.b ldcc # as dividend. + neg.l %d5 # sgn(rem) = sgn(dividend) +ldcc: + mov.b NDIVISOR(%a6), %d0 + eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative + beq.b ldqpos # branch to quot positive + +# 0x80000000 is the largest number representable as a 32-bit negative +# number. the negative of 0x80000000 is 0x80000000. + cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits? + bhi.b lddovf + + neg.l %d6 # make (-quot) 2's comp + + bra.b lddone + +ldqpos: + btst &0x1f, %d6 # will (+quot) fit in 32 bits? + bne.b lddovf + +lddone: +# if the register numbers are the same, only the quotient gets saved. +# so, if we always save the quotient second, we save ourselves a cmp&beq + andi.w &0x10,DIV64_CC(%a6) + mov.w DIV64_CC(%a6),%cc + tst.l %d6 # may set 'N' ccode bit + +# here, the result is in d1 and d0. the current strategy is to save +# the values at the location pointed to by a0. +# use movm here to not disturb the condition codes. +ldexit: + movm.l &0x0060,([0x14,%a6]) # save result + +# EPILOGUE BEGIN ######################################################## +# fmovm.l (%sp)+,&0x0 # restore no fpregs + movm.l (%sp)+,&0x00fc # restore d2-d7 + unlk %a6 +# EPILOGUE END ########################################################## + + rts + +# the result should be the unchanged dividend +lddovf: + mov.l 0xc(%a6), %d5 # get dividend hi + mov.l 0x10(%a6), %d6 # get dividend lo + + andi.w &0x1c,DIV64_CC(%a6) + ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit + mov.w DIV64_CC(%a6),%cc + + bra.b ldexit + +ldiv64eq0: + mov.l 0xc(%a6),([0x14,%a6]) + mov.l 0x10(%a6),([0x14,%a6],0x4) + + mov.w DIV64_CC(%a6),%cc + +# EPILOGUE BEGIN ######################################################## +# fmovm.l (%sp)+,&0x0 # restore no fpregs + movm.l (%sp)+,&0x00fc # restore d2-d7 + unlk %a6 +# EPILOGUE END ########################################################## + + divu.w &0x0,%d0 # force a divbyzero exception + rts + +########################################################################### +######################################################################### +# This routine uses the 'classical' Algorithm D from Donald Knuth's # +# Art of Computer Programming, vol II, Seminumerical Algorithms. # +# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, # +# where U,V are words of the quadword dividend and longword divisor, # +# and U1, V1 are the most significant words. # +# # +# The most sig. longword of the 64 bit dividend must be in %d5, least # +# in %d6. The divisor must be in the variable ddivisor, and the # +# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). # +# The quotient is returned in %d6, remainder in %d5, unless the # +# v (overflow) bit is set in the saved %ccr. If overflow, the dividend # +# is unchanged. # +######################################################################### +ldclassical: +# if the divisor msw is 0, use simpler algorithm then the full blown +# one at ddknuth: + + cmpi.l %d7, &0xffff + bhi.b lddknuth # go use D. Knuth algorithm + +# Since the divisor is only a word (and larger than the mslw of the dividend), +# a simpler algorithm may be used : +# In the general case, four quotient words would be created by +# dividing the divisor word into each dividend word. In this case, +# the first two quotient words must be zero, or overflow would occur. +# Since we already checked this case above, we can treat the most significant +# longword of the dividend as (0) remainder (see Knuth) and merely complete +# the last two divisions to get a quotient longword and word remainder: + + clr.l %d1 + swap %d5 # same as r*b if previous step rqd + swap %d6 # get u3 to lsw position + mov.w %d6, %d5 # rb + u3 + + divu.w %d7, %d5 + + mov.w %d5, %d1 # first quotient word + swap %d6 # get u4 + mov.w %d6, %d5 # rb + u4 + + divu.w %d7, %d5 + + swap %d1 + mov.w %d5, %d1 # 2nd quotient 'digit' + clr.w %d5 + swap %d5 # now remainder + mov.l %d1, %d6 # and quotient + + rts + +lddknuth: +# In this algorithm, the divisor is treated as a 2 digit (word) number +# which is divided into a 3 digit (word) dividend to get one quotient +# digit (word). After subtraction, the dividend is shifted and the +# process repeated. Before beginning, the divisor and quotient are +# 'normalized' so that the process of estimating the quotient digit +# will yield verifiably correct results.. + + clr.l DDNORMAL(%a6) # count of shifts for normalization + clr.b DDSECOND(%a6) # clear flag for quotient digits + clr.l %d1 # %d1 will hold trial quotient +lddnchk: + btst &31, %d7 # must we normalize? first word of + bne.b lddnormalized # divisor (V1) must be >= 65536/2 + addq.l &0x1, DDNORMAL(%a6) # count normalization shifts + lsl.l &0x1, %d7 # shift the divisor + lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2 + roxl.l &0x1, %d5 # shift u1,u2 + bra.w lddnchk +lddnormalized: + +# Now calculate an estimate of the quotient words (msw first, then lsw). +# The comments use subscripts for the first quotient digit determination. + mov.l %d7, %d3 # divisor + mov.l %d5, %d2 # dividend mslw + swap %d2 + swap %d3 + cmp.w %d2, %d3 # V1 = U1 ? + bne.b lddqcalc1 + mov.w &0xffff, %d1 # use max trial quotient word + bra.b lddadj0 +lddqcalc1: + mov.l %d5, %d1 + + divu.w %d3, %d1 # use quotient of mslw/msw + + andi.l &0x0000ffff, %d1 # zero any remainder +lddadj0: + +# now test the trial quotient and adjust. This step plus the +# normalization assures (according to Knuth) that the trial +# quotient will be at worst 1 too large. + mov.l %d6, -(%sp) + clr.w %d6 # word u3 left + swap %d6 # in lsw position +lddadj1: mov.l %d7, %d3 + mov.l %d1, %d2 + mulu.w %d7, %d2 # V2q + swap %d3 + mulu.w %d1, %d3 # V1q + mov.l %d5, %d4 # U1U2 + sub.l %d3, %d4 # U1U2 - V1q + + swap %d4 + + mov.w %d4,%d0 + mov.w %d6,%d4 # insert lower word (U3) + + tst.w %d0 # is upper word set? + bne.w lddadjd1 + +# add.l %d6, %d4 # (U1U2 - V1q) + U3 + + cmp.l %d2, %d4 + bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ? + subq.l &0x1, %d1 # yes, decrement and recheck + bra.b lddadj1 +lddadjd1: +# now test the word by multiplying it by the divisor (V1V2) and comparing +# the 3 digit (word) result with the current dividend words + mov.l %d5, -(%sp) # save %d5 (%d6 already saved) + mov.l %d1, %d6 + swap %d6 # shift answer to ms 3 words + mov.l %d7, %d5 + bsr.l ldmm2 + mov.l %d5, %d2 # now %d2,%d3 are trial*divisor + mov.l %d6, %d3 + mov.l (%sp)+, %d5 # restore dividend + mov.l (%sp)+, %d6 + sub.l %d3, %d6 + subx.l %d2, %d5 # subtract double precision + bcc ldd2nd # no carry, do next quotient digit + subq.l &0x1, %d1 # q is one too large +# need to add back divisor longword to current ms 3 digits of dividend +# - according to Knuth, this is done only 2 out of 65536 times for random +# divisor, dividend selection. + clr.l %d2 + mov.l %d7, %d3 + swap %d3 + clr.w %d3 # %d3 now ls word of divisor + add.l %d3, %d6 # aligned with 3rd word of dividend + addx.l %d2, %d5 + mov.l %d7, %d3 + clr.w %d3 # %d3 now ms word of divisor + swap %d3 # aligned with 2nd word of dividend + add.l %d3, %d5 +ldd2nd: + tst.b DDSECOND(%a6) # both q words done? + bne.b lddremain +# first quotient digit now correct. store digit and shift the +# (subtracted) dividend + mov.w %d1, DDQUOTIENT(%a6) + clr.l %d1 + swap %d5 + swap %d6 + mov.w %d6, %d5 + clr.w %d6 + st DDSECOND(%a6) # second digit + bra.w lddnormalized +lddremain: +# add 2nd word to quotient, get the remainder. + mov.w %d1, DDQUOTIENT+2(%a6) +# shift down one word/digit to renormalize remainder. + mov.w %d5, %d6 + swap %d6 + swap %d5 + mov.l DDNORMAL(%a6), %d7 # get norm shift count + beq.b lddrn + subq.l &0x1, %d7 # set for loop count +lddnlp: + lsr.l &0x1, %d5 # shift into %d6 + roxr.l &0x1, %d6 + dbf %d7, lddnlp +lddrn: + mov.l %d6, %d5 # remainder + mov.l DDQUOTIENT(%a6), %d6 # quotient + + rts +ldmm2: +# factors for the 32X32->64 multiplication are in %d5 and %d6. +# returns 64 bit result in %d5 (hi) %d6(lo). +# destroys %d2,%d3,%d4. + +# multiply hi,lo words of each factor to get 4 intermediate products + mov.l %d6, %d2 + mov.l %d6, %d3 + mov.l %d5, %d4 + swap %d3 + swap %d4 + mulu.w %d5, %d6 # %d6 <- lsw*lsw + mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source + mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest + mulu.w %d4, %d3 # %d3 <- msw*msw +# now use swap and addx to consolidate to two longwords + clr.l %d4 + swap %d6 + add.w %d5, %d6 # add msw of l*l to lsw of m*l product + addx.w %d4, %d3 # add any carry to m*m product + add.w %d2, %d6 # add in lsw of other m*l product + addx.w %d4, %d3 # add any carry to m*m product + swap %d6 # %d6 is low 32 bits of final product + clr.w %d5 + clr.w %d2 # lsw of two mixed products used, + swap %d5 # now use msws of longwords + swap %d2 + add.l %d2, %d5 + add.l %d3, %d5 # %d5 now ms 32 bits of final product + rts + +######################################################################### +# XDEF **************************************************************** # +# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction # +# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. # +# # +# This is the library version which is accessed as a subroutine # +# and therefore does not work exactly like the 680X0 mul{s,u}.l # +# 64-bit multiply instruction. # +# # +# XREF **************************************************************** # +# None # +# # +# INPUT *************************************************************** # +# 0x4(sp) = multiplier # +# 0x8(sp) = multiplicand # +# 0xc(sp) = pointer to location to place 64-bit result # +# # +# OUTPUT ************************************************************** # +# 0xc(sp) = points to location of 64-bit result # +# # +# ALGORITHM *********************************************************** # +# Perform the multiply in pieces using 16x16->32 unsigned # +# multiplies and "add" instructions. # +# Set the condition codes as appropriate before performing an # +# "rts". # +# # +######################################################################### + +set MUL64_CC, -4 + + global _060LSP__imulu64_ +_060LSP__imulu64_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,MUL64_CC(%a6) # save incoming ccodes + + mov.l 0x8(%a6),%d0 # store multiplier in d0 + beq.w mulu64_zero # handle zero separately + + mov.l 0xc(%a6),%d1 # get multiplicand in d1 + beq.w mulu64_zero # handle zero separately + +######################################################################### +# 63 32 0 # +# ---------------------------- # +# | hi(mplier) * hi(mplicand)| # +# ---------------------------- # +# ----------------------------- # +# | hi(mplier) * lo(mplicand) | # +# ----------------------------- # +# ----------------------------- # +# | lo(mplier) * hi(mplicand) | # +# ----------------------------- # +# | ----------------------------- # +# --|-- | lo(mplier) * lo(mplicand) | # +# | ----------------------------- # +# ======================================================== # +# -------------------------------------------------------- # +# | hi(result) | lo(result) | # +# -------------------------------------------------------- # +######################################################################### +mulu64_alg: +# load temp registers with operands + mov.l %d0,%d2 # mr in d2 + mov.l %d0,%d3 # mr in d3 + mov.l %d1,%d4 # md in d4 + swap %d3 # hi(mr) in lo d3 + swap %d4 # hi(md) in lo d4 + +# complete necessary multiplies: + mulu.w %d1,%d0 # [1] lo(mr) * lo(md) + mulu.w %d3,%d1 # [2] hi(mr) * lo(md) + mulu.w %d4,%d2 # [3] lo(mr) * hi(md) + mulu.w %d4,%d3 # [4] hi(mr) * hi(md) + +# add lo portions of [2],[3] to hi portion of [1]. +# add carries produced from these adds to [4]. +# lo([1]) is the final lo 16 bits of the result. + clr.l %d4 # load d4 w/ zero value + swap %d0 # hi([1]) <==> lo([1]) + add.w %d1,%d0 # hi([1]) + lo([2]) + addx.l %d4,%d3 # [4] + carry + add.w %d2,%d0 # hi([1]) + lo([3]) + addx.l %d4,%d3 # [4] + carry + swap %d0 # lo([1]) <==> hi([1]) + +# lo portions of [2],[3] have been added in to final result. +# now, clear lo, put hi in lo reg, and add to [4] + clr.w %d1 # clear lo([2]) + clr.w %d2 # clear hi([3]) + swap %d1 # hi([2]) in lo d1 + swap %d2 # hi([3]) in lo d2 + add.l %d2,%d1 # [4] + hi([2]) + add.l %d3,%d1 # [4] + hi([3]) + +# now, grab the condition codes. only one that can be set is 'N'. +# 'N' CAN be set if the operation is unsigned if bit 63 is set. + mov.w MUL64_CC(%a6),%d4 + andi.b &0x10,%d4 # keep old 'X' bit + tst.l %d1 # may set 'N' bit + bpl.b mulu64_ddone + ori.b &0x8,%d4 # set 'N' bit +mulu64_ddone: + mov.w %d4,%cc + +# here, the result is in d1 and d0. the current strategy is to save +# the values at the location pointed to by a0. +# use movm here to not disturb the condition codes. +mulu64_end: + exg %d1,%d0 + movm.l &0x0003,([0x10,%a6]) # save result + +# EPILOGUE BEGIN ######################################################## +# fmovm.l (%sp)+,&0x0 # restore no fpregs + movm.l (%sp)+,&0x001c # restore d2-d4 + unlk %a6 +# EPILOGUE END ########################################################## + + rts + +# one or both of the operands is zero so the result is also zero. +# save the zero result to the register file and set the 'Z' ccode bit. +mulu64_zero: + clr.l %d0 + clr.l %d1 + + mov.w MUL64_CC(%a6),%d4 + andi.b &0x10,%d4 + ori.b &0x4,%d4 + mov.w %d4,%cc # set 'Z' ccode bit + + bra.b mulu64_end + +########## +# muls.l # +########## + global _060LSP__imuls64_ +_060LSP__imuls64_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3c00,-(%sp) # save d2-d5 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,MUL64_CC(%a6) # save incoming ccodes + + mov.l 0x8(%a6),%d0 # store multiplier in d0 + beq.b mulu64_zero # handle zero separately + + mov.l 0xc(%a6),%d1 # get multiplicand in d1 + beq.b mulu64_zero # handle zero separately + + clr.b %d5 # clear sign tag + tst.l %d0 # is multiplier negative? + bge.b muls64_chk_md_sgn # no + neg.l %d0 # make multiplier positive + + ori.b &0x1,%d5 # save multiplier sgn + +# the result sign is the exclusive or of the operand sign bits. +muls64_chk_md_sgn: + tst.l %d1 # is multiplicand negative? + bge.b muls64_alg # no + neg.l %d1 # make multiplicand positive + + eori.b &0x1,%d5 # calculate correct sign + +######################################################################### +# 63 32 0 # +# ---------------------------- # +# | hi(mplier) * hi(mplicand)| # +# ---------------------------- # +# ----------------------------- # +# | hi(mplier) * lo(mplicand) | # +# ----------------------------- # +# ----------------------------- # +# | lo(mplier) * hi(mplicand) | # +# ----------------------------- # +# | ----------------------------- # +# --|-- | lo(mplier) * lo(mplicand) | # +# | ----------------------------- # +# ======================================================== # +# -------------------------------------------------------- # +# | hi(result) | lo(result) | # +# -------------------------------------------------------- # +######################################################################### +muls64_alg: +# load temp registers with operands + mov.l %d0,%d2 # mr in d2 + mov.l %d0,%d3 # mr in d3 + mov.l %d1,%d4 # md in d4 + swap %d3 # hi(mr) in lo d3 + swap %d4 # hi(md) in lo d4 + +# complete necessary multiplies: + mulu.w %d1,%d0 # [1] lo(mr) * lo(md) + mulu.w %d3,%d1 # [2] hi(mr) * lo(md) + mulu.w %d4,%d2 # [3] lo(mr) * hi(md) + mulu.w %d4,%d3 # [4] hi(mr) * hi(md) + +# add lo portions of [2],[3] to hi portion of [1]. +# add carries produced from these adds to [4]. +# lo([1]) is the final lo 16 bits of the result. + clr.l %d4 # load d4 w/ zero value + swap %d0 # hi([1]) <==> lo([1]) + add.w %d1,%d0 # hi([1]) + lo([2]) + addx.l %d4,%d3 # [4] + carry + add.w %d2,%d0 # hi([1]) + lo([3]) + addx.l %d4,%d3 # [4] + carry + swap %d0 # lo([1]) <==> hi([1]) + +# lo portions of [2],[3] have been added in to final result. +# now, clear lo, put hi in lo reg, and add to [4] + clr.w %d1 # clear lo([2]) + clr.w %d2 # clear hi([3]) + swap %d1 # hi([2]) in lo d1 + swap %d2 # hi([3]) in lo d2 + add.l %d2,%d1 # [4] + hi([2]) + add.l %d3,%d1 # [4] + hi([3]) + + tst.b %d5 # should result be signed? + beq.b muls64_done # no + +# result should be a signed negative number. +# compute 2's complement of the unsigned number: +# -negate all bits and add 1 +muls64_neg: + not.l %d0 # negate lo(result) bits + not.l %d1 # negate hi(result) bits + addq.l &1,%d0 # add 1 to lo(result) + addx.l %d4,%d1 # add carry to hi(result) + +muls64_done: + mov.w MUL64_CC(%a6),%d4 + andi.b &0x10,%d4 # keep old 'X' bit + tst.l %d1 # may set 'N' bit + bpl.b muls64_ddone + ori.b &0x8,%d4 # set 'N' bit +muls64_ddone: + mov.w %d4,%cc + +# here, the result is in d1 and d0. the current strategy is to save +# the values at the location pointed to by a0. +# use movm here to not disturb the condition codes. +muls64_end: + exg %d1,%d0 + movm.l &0x0003,([0x10,%a6]) # save result at (a0) + +# EPILOGUE BEGIN ######################################################## +# fmovm.l (%sp)+,&0x0 # restore no fpregs + movm.l (%sp)+,&0x003c # restore d2-d5 + unlk %a6 +# EPILOGUE END ########################################################## + + rts + +# one or both of the operands is zero so the result is also zero. +# save the zero result to the register file and set the 'Z' ccode bit. +muls64_zero: + clr.l %d0 + clr.l %d1 + + mov.w MUL64_CC(%a6),%d4 + andi.b &0x10,%d4 + ori.b &0x4,%d4 + mov.w %d4,%cc # set 'Z' ccode bit + + bra.b muls64_end + +######################################################################### +# XDEF **************************************************************** # +# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". # +# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". # +# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". # +# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". # +# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". # +# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". # +# # +# This is the library version which is accessed as a subroutine # +# and therefore does not work exactly like the 680X0 "cmp2" # +# instruction. # +# # +# XREF **************************************************************** # +# None # +# # +# INPUT *************************************************************** # +# 0x4(sp) = Rn # +# 0x8(sp) = pointer to boundary pair # +# # +# OUTPUT ************************************************************** # +# cc = condition codes are set correctly # +# # +# ALGORITHM *********************************************************** # +# In the interest of simplicity, all operands are converted to # +# longword size whether the operation is byte, word, or long. The # +# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is # +# also sign extended. If Rn is an address register, it need not be sign # +# extended since the full register is always used. # +# The condition codes are set correctly before the final "rts". # +# # +######################################################################### + +set CMP2_CC, -4 + + global _060LSP__cmp2_Ab_ +_060LSP__cmp2_Ab_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.b ([0xc,%a6],0x0),%d0 + mov.b ([0xc,%a6],0x1),%d1 + + extb.l %d0 # sign extend lo bnd + extb.l %d1 # sign extend hi bnd + bra.w l_cmp2_cmp # go do the compare emulation + + global _060LSP__cmp2_Aw_ +_060LSP__cmp2_Aw_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.w ([0xc,%a6],0x0),%d0 + mov.w ([0xc,%a6],0x2),%d1 + + ext.l %d0 # sign extend lo bnd + ext.l %d1 # sign extend hi bnd + bra.w l_cmp2_cmp # go do the compare emulation + + global _060LSP__cmp2_Al_ +_060LSP__cmp2_Al_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.l ([0xc,%a6],0x0),%d0 + mov.l ([0xc,%a6],0x4),%d1 + bra.w l_cmp2_cmp # go do the compare emulation + + global _060LSP__cmp2_Db_ +_060LSP__cmp2_Db_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.b ([0xc,%a6],0x0),%d0 + mov.b ([0xc,%a6],0x1),%d1 + + extb.l %d0 # sign extend lo bnd + extb.l %d1 # sign extend hi bnd + +# operation is a data register compare. +# sign extend byte to long so we can do simple longword compares. + extb.l %d2 # sign extend data byte + bra.w l_cmp2_cmp # go do the compare emulation + + global _060LSP__cmp2_Dw_ +_060LSP__cmp2_Dw_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.w ([0xc,%a6],0x0),%d0 + mov.w ([0xc,%a6],0x2),%d1 + + ext.l %d0 # sign extend lo bnd + ext.l %d1 # sign extend hi bnd + +# operation is a data register compare. +# sign extend word to long so we can do simple longword compares. + ext.l %d2 # sign extend data word + bra.w l_cmp2_cmp # go emulate compare + + global _060LSP__cmp2_Dl_ +_060LSP__cmp2_Dl_: + +# PROLOGUE BEGIN ######################################################## + link.w %a6,&-4 + movm.l &0x3800,-(%sp) # save d2-d4 +# fmovm.l &0x0,-(%sp) # save no fpregs +# PROLOGUE END ########################################################## + + mov.w %cc,CMP2_CC(%a6) + mov.l 0x8(%a6), %d2 # get regval + + mov.l ([0xc,%a6],0x0),%d0 + mov.l ([0xc,%a6],0x4),%d1 + +# +# To set the ccodes correctly: +# (1) save 'Z' bit from (Rn - lo) +# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi)) +# (3) keep 'X', 'N', and 'V' from before instruction +# (4) combine ccodes +# +l_cmp2_cmp: + sub.l %d0, %d2 # (Rn - lo) + mov.w %cc, %d3 # fetch resulting ccodes + andi.b &0x4, %d3 # keep 'Z' bit + sub.l %d0, %d1 # (hi - lo) + cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi)) + + mov.w %cc, %d4 # fetch resulting ccodes + or.b %d4, %d3 # combine w/ earlier ccodes + andi.b &0x5, %d3 # keep 'Z' and 'N' + + mov.w CMP2_CC(%a6), %d4 # fetch old ccodes + andi.b &0x1a, %d4 # keep 'X','N','V' bits + or.b %d3, %d4 # insert new ccodes + mov.w %d4,%cc # save new ccodes + +# EPILOGUE BEGIN ######################################################## +# fmovm.l (%sp)+,&0x0 # restore no fpregs + movm.l (%sp)+,&0x001c # restore d2-d4 + unlk %a6 +# EPILOGUE END ########################################################## + + rts |