diff options
Diffstat (limited to 'contrib/compiler-rt/lib/arm/udivmodsi4.S')
-rw-r--r-- | contrib/compiler-rt/lib/arm/udivmodsi4.S | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/contrib/compiler-rt/lib/arm/udivmodsi4.S b/contrib/compiler-rt/lib/arm/udivmodsi4.S new file mode 100644 index 0000000..d164a75 --- /dev/null +++ b/contrib/compiler-rt/lib/arm/udivmodsi4.S @@ -0,0 +1,80 @@ +/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===// + * + * This file implements the __udivmodsi4 (32-bit unsigned integer divide and + * modulus) function for the ARM architecture. A naive digit-by-digit + * computation is employed for simplicity. + * + *===----------------------------------------------------------------------===*/ + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + +#define a r0 +#define b r1 +#define i r3 +#define r r4 +#define q ip +#define one lr + +.syntax unified +.align 3 +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +// We use a simple digit by digit algorithm; before we get into the actual +// divide loop, we must calculate the left-shift amount necessary to align +// the MSB of the divisor with that of the dividend (If this shift is +// negative, then the result is zero, and we early out). We also conjure a +// bit mask of 1 to use in constructing the quotient, and initialize the +// quotient to zero. + ESTABLISH_FRAME + clz r4, a + tst b, b // detect divide-by-zero + clz r3, b + mov q, #0 + beq LOCAL_LABEL(return) // return 0 if b is zero. + mov one, #1 + subs i, r3, r4 + blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) + +LOCAL_LABEL(mainLoop): +// This loop basically implements the following: +// +// do { +// if (a >= b << i) { +// a -= b << i; +// q |= 1 << i; +// if (a == 0) break; +// } +// } while (--i) +// +// Note that this does not perform the final iteration (i == 0); by doing it +// this way, we can merge the two branches which is a substantial win for +// such a tight loop on current ARM architectures. + subs r, a, b, lsl i + orrhs q, q,one, lsl i + movhs a, r + subsne i, i, #1 + bhi LOCAL_LABEL(mainLoop) + +// Do the final test subtraction and update of quotient (i == 0), as it is +// not performed in the main loop. + subs r, a, b + orrhs q, #1 + movhs a, r + +LOCAL_LABEL(return): +// Store the remainder, and move the quotient to r0, then return. + str a, [r2] + mov r0, q + CLEAR_FRAME_AND_RETURN |