summaryrefslogtreecommitdiffstats
path: root/contrib/compiler-rt/lib/sparc64
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/compiler-rt/lib/sparc64')
-rw-r--r--contrib/compiler-rt/lib/sparc64/divmod.m4250
-rw-r--r--contrib/compiler-rt/lib/sparc64/divsi3.S333
-rw-r--r--contrib/compiler-rt/lib/sparc64/generate.sh6
-rw-r--r--contrib/compiler-rt/lib/sparc64/modsi3.S333
-rw-r--r--contrib/compiler-rt/lib/sparc64/udivsi3.S1
-rw-r--r--contrib/compiler-rt/lib/sparc64/umodsi3.S1
6 files changed, 924 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/sparc64/divmod.m4 b/contrib/compiler-rt/lib/sparc64/divmod.m4
new file mode 100644
index 0000000..6338199
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/divmod.m4
@@ -0,0 +1,250 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess: define(N, 4) define(TWOSUPN, 16)
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously: define(WORDSIZE, 32)
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ * define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/N )
+ * Note that this is log_base_(2ˆN) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+
+define(dividend, `%o0')
+define(divisor,`%o1')
+define(Q, `%o2')
+define(R, `%o3')
+define(ITER, `%o4')
+define(V, `%o5')
+define(SIGN, `%g3')
+define(T, `%g1')
+define(SC,`%g2')
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=N
+ * $2 -- the current accumulation of quotient bits
+ * N -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * R -- current remainder
+ * Q -- current quotient
+ * V -- current comparand
+ * cc -- set on current value of R
+ * Dynamic output:
+ * R', Q', V', cc'
+ */
+
+#include "../assembly.h"
+
+.text
+ .align 4
+
+define(DEVELOP_QUOTIENT_BITS,
+` !depth $1, accumulated bits $2
+ bl L.$1.eval(TWOSUPN+$2)
+ srl V,1,V
+ ! remainder is nonnegative
+ subcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2+1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
+ ')
+L.$1.eval(TWOSUPN+$2):
+ ! remainder is negative
+ addcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2-1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
+ ')
+ ifelse( $1, 1, `9:')
+')
+ifelse( ANSWER, `quotient', `
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,SIGN ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ xor divisor,dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+',`
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,SIGN ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ mov dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+')
+
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc divisor,%g0,V ! movcc divisor,V
+ te 2 ! if divisor = 0
+ mov dividend,R
+ mov 0,Q
+ sethi %hi(1<<(WORDSIZE-TOPBITS-1)),T
+ cmp R,T
+ blu not_really_big
+ mov 0,ITER
+ !
+ ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is N*ITER+SC, where
+ ! SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to Shift V into
+! the top decade: so don't even bother to compare to R.
+1:
+ cmp V,T
+ bgeu 3f
+ mov 1,SC
+ sll V,N,V
+ b 1b
+ inc ITER
+! Now compute SC
+2: addcc V,V,V
+ bcc not_too_big
+ add SC,1,SC
+ ! We're here if the divisor overflowed when Shifting.
+ ! This means that R has the high-order bit set.
+ ! Restore V and subtract from R.
+ sll T,TOPBITS,T ! high order bit
+ srl V,1,V ! rest of V
+ add V,T,V
+ b do_single_div
+ dec SC
+not_too_big:
+3: cmp V,R
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! V > R: went too far: back up 1 step
+! srl V,1,V
+! dec SC
+! do single-bit divide steps
+!
+! We have to be careful here. We know that R >= V, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if R >= 0. Because both R and V may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc SC
+ bl end_regular_divide
+ nop
+ sub R,V,R
+ mov 1,Q
+ b end_single_divloop
+ nop
+single_divloop:
+ sll Q,1,Q
+ bl 1f
+ srl V,1,V
+ ! R >= 0
+ sub R,V,R
+ b 2f
+ inc Q
+ 1: ! R < 0
+ add R,V,R
+ dec Q
+ 2:
+ end_single_divloop:
+ deccc SC
+ bge single_divloop
+ tst R
+ b end_regular_divide
+ nop
+
+not_really_big:
+1:
+ sll V,N,V
+ cmp V,R
+ bleu 1b
+ inccc ITER
+ be got_result
+ dec ITER
+do_regular_divide:
+ ! Do the main division iteration
+ tst R
+ ! Fall through into divide loop
+divloop:
+ sll Q,N,Q
+ DEVELOP_QUOTIENT_BITS( 1, 0 )
+end_regular_divide:
+ deccc ITER
+ bge divloop
+ tst R
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ifelse( ANSWER, `quotient',
+` dec Q
+',` add R,divisor,R
+')
+
+got_result:
+ tst SIGN
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ifelse( ANSWER, `quotient',
+` neg %o2,%o0 ! quotient <- -Q
+',` neg %o3,%o0 ! remainder <- -R
+')
+1:
+ retl ! leaf-routine return
+ifelse( ANSWER, `quotient',
+` mov %o2,%o0 ! quotient <- Q
+',` mov %o3,%o0 ! remainder <- R
+')
diff --git a/contrib/compiler-rt/lib/sparc64/divsi3.S b/contrib/compiler-rt/lib/sparc64/divsi3.S
new file mode 100644
index 0000000..52133f7
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/divsi3.S
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 4
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,%g3 ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ xor %o1,%o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b end_single_divloop
+ nop
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b end_regular_divide
+ nop
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ dec %o2
+got_result:
+ tst %g3
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ neg %o2,%o0 ! quotient <- -%o2
+1:
+ retl ! leaf-routine return
+ mov %o2,%o0 ! quotient <- %o2
diff --git a/contrib/compiler-rt/lib/sparc64/generate.sh b/contrib/compiler-rt/lib/sparc64/generate.sh
new file mode 100644
index 0000000..17c1106
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/generate.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S
+m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > divsi3.S
+echo '! This file intentionally left blank' > umodsi3.S
+echo '! This file intentionally left blank' > udivsi3.S
diff --git a/contrib/compiler-rt/lib/sparc64/modsi3.S b/contrib/compiler-rt/lib/sparc64/modsi3.S
new file mode 100644
index 0000000..98deb9d
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/modsi3.S
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 4
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,%g3 ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ mov %o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b end_single_divloop
+ nop
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b end_regular_divide
+ nop
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ add %o3,%o1,%o3
+got_result:
+ tst %g3
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ neg %o3,%o0 ! remainder <- -%o3
+1:
+ retl ! leaf-routine return
+ mov %o3,%o0 ! remainder <- %o3
diff --git a/contrib/compiler-rt/lib/sparc64/udivsi3.S b/contrib/compiler-rt/lib/sparc64/udivsi3.S
new file mode 100644
index 0000000..a418852
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/udivsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
diff --git a/contrib/compiler-rt/lib/sparc64/umodsi3.S b/contrib/compiler-rt/lib/sparc64/umodsi3.S
new file mode 100644
index 0000000..a418852
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/umodsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
OpenPOWER on IntegriCloud