summaryrefslogtreecommitdiffstats
path: root/lib/arm/umodsi3.S
blob: 328e7054b8572917ad97ce3919cc4f1467955fd0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
/*===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===//
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is dual licensed under the MIT and the University of Illinois Open
 * Source Licenses. See LICENSE.TXT for details.
 *
 *===----------------------------------------------------------------------===//
 *
 * This file implements the __umodsi3 (32-bit unsigned integer modulus) 
 * function for the ARM architecture.  A naive digit-by-digit computation is
 * employed for simplicity.
 *
 *===----------------------------------------------------------------------===*/

#include "../assembly.h"

#define a r0
#define b r1
#define r r2
#define i r3

.syntax unified
.align 3
DEFINE_COMPILERRT_FUNCTION(__umodsi3)
#if __ARM_ARCH_7S__
	tst     r1, r1
	beq     LOCAL_LABEL(divzero)
	udiv	r2, r0, r1
	mls     r0, r2, r1, r0
	bx      lr
LOCAL_LABEL(divzero):
	mov     r0, #0
	bx      lr
#else
//  We use a simple digit by digit algorithm; before we get into the actual 
//  divide loop, we must calculate the left-shift amount necessary to align
//  the MSB of the divisor with that of the dividend.
    clz     r2,     a
    tst     b,      b       // detect b == 0
    clz     r3,     b
    bxeq    lr              // return a if b == 0
    subs    i,      r3, r2
    bxlt    lr              // return a if MSB(a) < MSB(b)

LOCAL_LABEL(mainLoop):
//  This loop basically implements the following:
//
//  do {
//      if (a >= b << i) {
//          a -= b << i;
//          if (a == 0) break;
//      }
//  } while (--i)
//
//  Note that this does not perform the final iteration (i == 0); by doing it
//  this way, we can merge the two branches which is a substantial win for
//  such a tight loop on current ARM architectures.
    subs    r,      a,  b, lsl i
    movhs   a,      r
    subsne  i,      i, #1
    bhi     LOCAL_LABEL(mainLoop)

//  Do the final test subtraction and update of remainder (i == 0), as it is
//  not performed in the main loop.
    subs    r,      a,  b
    movhs   a,      r
    bx      lr
#endif
OpenPOWER on IntegriCloud