summaryrefslogtreecommitdiffstats
path: root/lib/mpi/longlong.h
diff options
context:
space:
mode:
authorFlorian Tobias Schandinat <FlorianSchandinat@gmx.de>2012-02-24 00:47:05 +0000
committerFlorian Tobias Schandinat <FlorianSchandinat@gmx.de>2012-02-24 00:47:05 +0000
commit8aaaaf3cbfb96a9f74c5775c71067a9290244ec7 (patch)
tree67349c6a69740150b67c4c39de49cde9b3648aa5 /lib/mpi/longlong.h
parent94f61a7666224ab307e387ff0d146055ae77e6ab (diff)
parente29206381a1436e0f47c0f5b9a23159a03c57715 (diff)
downloadop-kernel-dev-8aaaaf3cbfb96a9f74c5775c71067a9290244ec7.zip
op-kernel-dev-8aaaaf3cbfb96a9f74c5775c71067a9290244ec7.tar.gz
Merge branch 'fbdev-for-linus' into fbdev-next
Diffstat (limited to 'lib/mpi/longlong.h')
-rw-r--r--lib/mpi/longlong.h44
1 files changed, 33 insertions, 11 deletions
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index b87487b..29f9862 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -1200,18 +1200,40 @@ do { \
"r" ((USItype)(v)) \
: "%g1", "%g2" __AND_CLOBBER_CC)
#define UMUL_TIME 39 /* 39 instructions */
-#endif
-#ifndef udiv_qrnnd
-#ifndef LONGLONG_STANDALONE
+/* It's quite necessary to add this much assembler for the sparc.
+ The default udiv_qrnnd (in C) is more than 10 times slower! */
#define udiv_qrnnd(q, r, n1, n0, d) \
-do { USItype __r; \
- (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
- (r) = __r; \
-} while (0)
- extern USItype __udiv_qrnnd();
-#define UDIV_TIME 140
-#endif /* LONGLONG_STANDALONE */
-#endif /* udiv_qrnnd */
+ __asm__ ("! Inlined udiv_qrnnd\n\t" \
+ "mov 32,%%g1\n\t" \
+ "subcc %1,%2,%%g0\n\t" \
+ "1: bcs 5f\n\t" \
+ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
+ "sub %1,%2,%1 ! this kills msb of n\n\t" \
+ "addx %1,%1,%1 ! so this can't give carry\n\t" \
+ "subcc %%g1,1,%%g1\n\t" \
+ "2: bne 1b\n\t" \
+ "subcc %1,%2,%%g0\n\t" \
+ "bcs 3f\n\t" \
+ "addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n\t" \
+ "b 3f\n\t" \
+ "sub %1,%2,%1 ! this kills msb of n\n\t" \
+ "4: sub %1,%2,%1\n\t" \
+ "5: addxcc %1,%1,%1\n\t" \
+ "bcc 2b\n\t" \
+ "subcc %%g1,1,%%g1\n\t" \
+ "! Got carry from n. Subtract next step to cancel this carry.\n\t" \
+ "bne 4b\n\t" \
+ "addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n\t" \
+ "sub %1,%2,%1\n\t" \
+ "3: xnor %0,0,%0\n\t" \
+ "! End of inline udiv_qrnnd\n" \
+ : "=&r" ((USItype)(q)), \
+ "=&r" ((USItype)(r)) \
+ : "r" ((USItype)(d)), \
+ "1" ((USItype)(n1)), \
+ "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
+#endif
#endif /* __sparc__ */
/***************************************
OpenPOWER on IntegriCloud