summaryrefslogtreecommitdiffstats
path: root/contrib/compiler-rt
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2012-01-12 17:55:22 +0000
committered <ed@FreeBSD.org>2012-01-12 17:55:22 +0000
commitbc3ccc0a0014a72728da14afed61b31a74b2c95a (patch)
tree7b173471b286707b12d9a791dffff9bfb8ae5a55 /contrib/compiler-rt
parent1e5c8c891cb84cffeda3a526192d33c0161f00b4 (diff)
downloadFreeBSD-src-bc3ccc0a0014a72728da14afed61b31a74b2c95a.zip
FreeBSD-src-bc3ccc0a0014a72728da14afed61b31a74b2c95a.tar.gz
Add SPARC64 version of div/mod written in assembly.
This version is similar to the code shipped with libgcc. It is based on the code from the SPARC64 architecture manual, provided without any restrictions. Tested by: flo@
Diffstat (limited to 'contrib/compiler-rt')
-rw-r--r--contrib/compiler-rt/lib/sparc64/divmod.m4250
-rw-r--r--contrib/compiler-rt/lib/sparc64/divsi3.S333
-rw-r--r--contrib/compiler-rt/lib/sparc64/generate.sh6
-rw-r--r--contrib/compiler-rt/lib/sparc64/modsi3.S333
-rw-r--r--contrib/compiler-rt/lib/sparc64/udivsi3.S1
-rw-r--r--contrib/compiler-rt/lib/sparc64/umodsi3.S1
6 files changed, 924 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/sparc64/divmod.m4 b/contrib/compiler-rt/lib/sparc64/divmod.m4
new file mode 100644
index 0000000..6338199
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/divmod.m4
@@ -0,0 +1,250 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess: define(N, 4) define(TWOSUPN, 16)
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously: define(WORDSIZE, 32)
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ * define(TOPBITS, eval( WORDSIZE - N*((WORDSIZE-1)/N) ) )
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/N )
+ * Note that this is log_base_(2ˆN) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*N-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / N ) x ( 10 + 7N/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+
+define(dividend, `%o0')
+define(divisor,`%o1')
+define(Q, `%o2')
+define(R, `%o3')
+define(ITER, `%o4')
+define(V, `%o5')
+define(SIGN, `%g3')
+define(T, `%g1')
+define(SC,`%g2')
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=N
+ * $2 -- the current accumulation of quotient bits
+ * N -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * R -- current remainder
+ * Q -- current quotient
+ * V -- current comparand
+ * cc -- set on current value of R
+ * Dynamic output:
+ * R', Q', V', cc'
+ */
+
+#include "../assembly.h"
+
+.text
+ .align 4
+
+define(DEVELOP_QUOTIENT_BITS,
+` !depth $1, accumulated bits $2
+ bl L.$1.eval(TWOSUPN+$2)
+ srl V,1,V
+ ! remainder is nonnegative
+ subcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2+1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2+1)')
+ ')
+L.$1.eval(TWOSUPN+$2):
+ ! remainder is negative
+ addcc R,V,R
+ ifelse( $1, N,
+ ` b 9f
+ add Q, ($2*2-1), Q
+ ',` DEVELOP_QUOTIENT_BITS( incr($1), `eval(2*$2-1)')
+ ')
+ ifelse( $1, 1, `9:')
+')
+ifelse( ANSWER, `quotient', `
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,SIGN ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ xor divisor,dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+',`
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,SIGN ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc divisor,dividend,%g0 ! are either dividend or divisor negative
+ bge divide ! if not, skip this junk
+ mov dividend,SIGN ! record sign of result in sign of SIGN
+ tst divisor
+ bge 2f
+ tst dividend
+ ! divisor < 0
+ bge divide
+ neg divisor
+ 2:
+ ! dividend < 0
+ neg dividend
+ ! FALL THROUGH
+')
+
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc divisor,%g0,V ! movcc divisor,V
+ te 2 ! if divisor = 0
+ mov dividend,R
+ mov 0,Q
+ sethi %hi(1<<(WORDSIZE-TOPBITS-1)),T
+ cmp R,T
+ blu not_really_big
+ mov 0,ITER
+ !
+ ! Here, the dividend is >= 2ˆ(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is N*ITER+SC, where
+ ! SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to Shift V into
+! the top decade: so don't even bother to compare to R.
+1:
+ cmp V,T
+ bgeu 3f
+ mov 1,SC
+ sll V,N,V
+ b 1b
+ inc ITER
+! Now compute SC
+2: addcc V,V,V
+ bcc not_too_big
+ add SC,1,SC
+ ! We're here if the divisor overflowed when Shifting.
+ ! This means that R has the high-order bit set.
+ ! Restore V and subtract from R.
+ sll T,TOPBITS,T ! high order bit
+ srl V,1,V ! rest of V
+ add V,T,V
+ b do_single_div
+ dec SC
+not_too_big:
+3: cmp V,R
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! V > R: went too far: back up 1 step
+! srl V,1,V
+! dec SC
+! do single-bit divide steps
+!
+! We have to be careful here. We know that R >= V, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if R >= 0. Because both R and V may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc SC
+ bl end_regular_divide
+ nop
+ sub R,V,R
+ mov 1,Q
+ b end_single_divloop
+ nop
+single_divloop:
+ sll Q,1,Q
+ bl 1f
+ srl V,1,V
+ ! R >= 0
+ sub R,V,R
+ b 2f
+ inc Q
+ 1: ! R < 0
+ add R,V,R
+ dec Q
+ 2:
+ end_single_divloop:
+ deccc SC
+ bge single_divloop
+ tst R
+ b end_regular_divide
+ nop
+
+not_really_big:
+1:
+ sll V,N,V
+ cmp V,R
+ bleu 1b
+ inccc ITER
+ be got_result
+ dec ITER
+do_regular_divide:
+ ! Do the main division iteration
+ tst R
+ ! Fall through into divide loop
+divloop:
+ sll Q,N,Q
+ DEVELOP_QUOTIENT_BITS( 1, 0 )
+end_regular_divide:
+ deccc ITER
+ bge divloop
+ tst R
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ifelse( ANSWER, `quotient',
+` dec Q
+',` add R,divisor,R
+')
+
+got_result:
+ tst SIGN
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ifelse( ANSWER, `quotient',
+` neg %o2,%o0 ! quotient <- -Q
+',` neg %o3,%o0 ! remainder <- -R
+')
+1:
+ retl ! leaf-routine return
+ifelse( ANSWER, `quotient',
+` mov %o2,%o0 ! quotient <- Q
+',` mov %o3,%o0 ! remainder <- R
+')
diff --git a/contrib/compiler-rt/lib/sparc64/divsi3.S b/contrib/compiler-rt/lib/sparc64/divsi3.S
new file mode 100644
index 0000000..52133f7
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/divsi3.S
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 4
+DEFINE_COMPILERRT_FUNCTION(__udivsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,%g3 ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__divsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ xor %o1,%o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b end_single_divloop
+ nop
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b end_regular_divide
+ nop
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ dec %o2
+got_result:
+ tst %g3
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ neg %o2,%o0 ! quotient <- -%o2
+1:
+ retl ! leaf-routine return
+ mov %o2,%o0 ! quotient <- %o2
diff --git a/contrib/compiler-rt/lib/sparc64/generate.sh b/contrib/compiler-rt/lib/sparc64/generate.sh
new file mode 100644
index 0000000..17c1106
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/generate.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+m4 divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > modsi3.S
+m4 -DANSWER=quotient divmod.m4 | sed -e 's/[[:space:]]*$//' | grep -v '^$' > divsi3.S
+echo '! This file intentionally left blank' > umodsi3.S
+echo '! This file intentionally left blank' > udivsi3.S
diff --git a/contrib/compiler-rt/lib/sparc64/modsi3.S b/contrib/compiler-rt/lib/sparc64/modsi3.S
new file mode 100644
index 0000000..98deb9d
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/modsi3.S
@@ -0,0 +1,333 @@
+/*
+ * This m4 code has been taken from The SPARC Architecture Manual Version 8.
+ */
+/*
+ * Division/Remainder
+ *
+ * Input is:
+ * dividend -- the thing being divided
+ * divisor -- how many ways to divide it
+ * Important parameters:
+ * N -- how many bits per iteration we try to get
+ * as our current guess:
+ * WORDSIZE -- how many bits altogether we're talking about:
+ * obviously:
+ * A derived constant:
+ * TOPBITS -- how many bits are in the top "decade" of a number:
+ *
+ * Important variables are:
+ * Q -- the partial quotient under development -- initially 0
+ * R -- the remainder so far -- initially == the dividend
+ * ITER -- number of iterations of the main division loop which will
+ * be required. Equal to CEIL( lg2(quotient)/4 )
+ * Note that this is log_base_(2ˆ4) of the quotient.
+ * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
+ * Cost:
+ * current estimate for non-large dividend is
+ * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
+ * a large dividend is one greater than 2ˆ(31-4 ) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ * This uses the m4 and cpp macro preprocessors.
+ */
+/*
+ * This is the recursive definition of how we develop quotient digits.
+ * It takes three important parameters:
+ * $1 -- the current depth, 1<=$1<=4
+ * $2 -- the current accumulation of quotient bits
+ * 4 -- max depth
+ * We add a new bit to $2 and either recurse or insert the bits in the quotient.
+ * Dynamic input:
+ * %o3 -- current remainder
+ * %o2 -- current quotient
+ * %o5 -- current comparand
+ * cc -- set on current value of %o3
+ * Dynamic output:
+ * %o3', %o2', %o5', cc'
+ */
+#include "../assembly.h"
+.text
+ .align 4
+DEFINE_COMPILERRT_FUNCTION(__umodsi3)
+ save %sp,-64,%sp ! do this for debugging
+ b divide
+ mov 0,%g3 ! result always nonnegative
+DEFINE_COMPILERRT_FUNCTION(__modsi3)
+ save %sp,-64,%sp ! do this for debugging
+ orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative
+ bge divide ! if not, skip this junk
+ mov %o0,%g3 ! record sign of result in sign of %g3
+ tst %o1
+ bge 2f
+ tst %o0
+ ! %o1 < 0
+ bge divide
+ neg %o1
+ 2:
+ ! %o0 < 0
+ neg %o0
+ ! FALL THROUGH
+divide:
+ ! Compute size of quotient, scale comparand.
+ orcc %o1,%g0,%o5 ! movcc %o1,%o5
+ te 2 ! if %o1 = 0
+ mov %o0,%o3
+ mov 0,%o2
+ sethi %hi(1<<(32-4 -1)),%g1
+ cmp %o3,%g1
+ blu not_really_big
+ mov 0,%o4
+ !
+ ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
+ ! as our usual 4-at-a-shot divide step will cause overflow and havoc.
+ ! The total number of bits in the result here is 4*%o4+%g2, where
+ ! %g2 <= 4.
+ ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
+! the top decade: so don't even bother to compare to %o3.
+1:
+ cmp %o5,%g1
+ bgeu 3f
+ mov 1,%g2
+ sll %o5,4,%o5
+ b 1b
+ inc %o4
+! Now compute %g2
+2: addcc %o5,%o5,%o5
+ bcc not_too_big
+ add %g2,1,%g2
+ ! We're here if the %o1 overflowed when Shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1,4 ,%g1 ! high order bit
+ srl %o5,1,%o5 ! rest of %o5
+ add %o5,%g1,%o5
+ b do_single_div
+ dec %g2
+not_too_big:
+3: cmp %o5,%o3
+ blu 2b
+ nop
+ be do_single_div
+ nop
+! %o5 > %o3: went too far: back up 1 step
+! srl %o5,1,%o5
+! dec %g2
+! do single-bit divide steps
+!
+! We have to be careful here. We know that %o3 >= %o5, so we can do the
+! first divide step without thinking. BUT, the others are conditional,
+! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+! order bit set in the first step, just falling into the regular
+! division loop will mess up the first time around.
+! So we unroll slightly...
+do_single_div:
+ deccc %g2
+ bl end_regular_divide
+ nop
+ sub %o3,%o5,%o3
+ mov 1,%o2
+ b end_single_divloop
+ nop
+single_divloop:
+ sll %o2,1,%o2
+ bl 1f
+ srl %o5,1,%o5
+ ! %o3 >= 0
+ sub %o3,%o5,%o3
+ b 2f
+ inc %o2
+ 1: ! %o3 < 0
+ add %o3,%o5,%o3
+ dec %o2
+ 2:
+ end_single_divloop:
+ deccc %g2
+ bge single_divloop
+ tst %o3
+ b end_regular_divide
+ nop
+not_really_big:
+1:
+ sll %o5,4,%o5
+ cmp %o5,%o3
+ bleu 1b
+ inccc %o4
+ be got_result
+ dec %o4
+do_regular_divide:
+ ! Do the main division iteration
+ tst %o3
+ ! Fall through into divide loop
+divloop:
+ sll %o2,4,%o2
+ !depth 1, accumulated bits 0
+ bl L.1.16
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 2, accumulated bits 1
+ bl L.2.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits 3
+ bl L.3.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 7
+ bl L.4.23
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+L.4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+L.3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 5
+ bl L.4.21
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+L.4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+L.2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits 1
+ bl L.3.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits 3
+ bl L.4.19
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+L.4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+L.3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits 1
+ bl L.4.17
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+L.4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+L.1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 2, accumulated bits -1
+ bl L.2.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 3, accumulated bits -1
+ bl L.3.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -1
+ bl L.4.15
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+L.4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+L.3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -3
+ bl L.4.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+L.4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+L.2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 3, accumulated bits -3
+ bl L.3.13
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ !depth 4, accumulated bits -5
+ bl L.4.11
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+L.4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+L.3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ !depth 4, accumulated bits -7
+ bl L.4.9
+ srl %o5,1,%o5
+ ! remainder is nonnegative
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+L.4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+ 9:
+end_regular_divide:
+ deccc %o4
+ bge divloop
+ tst %o3
+ bge got_result
+ nop
+ ! non-restoring fixup here
+ add %o3,%o1,%o3
+got_result:
+ tst %g3
+ bge 1f
+ restore
+ ! answer < 0
+ retl ! leaf-routine return
+ neg %o3,%o0 ! remainder <- -%o3
+1:
+ retl ! leaf-routine return
+ mov %o3,%o0 ! remainder <- %o3
diff --git a/contrib/compiler-rt/lib/sparc64/udivsi3.S b/contrib/compiler-rt/lib/sparc64/udivsi3.S
new file mode 100644
index 0000000..a418852
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/udivsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
diff --git a/contrib/compiler-rt/lib/sparc64/umodsi3.S b/contrib/compiler-rt/lib/sparc64/umodsi3.S
new file mode 100644
index 0000000..a418852
--- /dev/null
+++ b/contrib/compiler-rt/lib/sparc64/umodsi3.S
@@ -0,0 +1 @@
+! This file intentionally left blank
OpenPOWER on IntegriCloud