Home | History | Annotate | Download | only in arm
      1 /*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is dual licensed under the MIT and the University of Illinois Open
      6  * Source Licenses. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===//
      9  *
     10  * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
     11  * modulus) function for the ARM architecture.  A naive digit-by-digit
     12  * computation is employed for simplicity.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #include "../assembly.h"
     17 
     18 #define ESTABLISH_FRAME    \
     19     push   {r4, r7, lr}   ;\
     20     add     r7,     sp, #4
     21 #define CLEAR_FRAME_AND_RETURN \
     22     pop    {r4, r7, pc}
     23 
     24 #define a r0
     25 #define b r1
     26 #define i r3
     27 #define r r4
     28 #define q ip
     29 #define one lr
     30 
     31 .syntax unified
     32 .align 3
     33 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
     34 #if __ARM_ARCH_7S__
     35 	tst     r1, r1
     36 	beq     LOCAL_LABEL(divzero)
     37 	mov 	r3, r0
     38 	udiv	r0, r3, r1
     39 	mls 	r1, r0, r1, r3
     40 	str 	r1, [r2]
     41 	bx  	lr
     42 LOCAL_LABEL(divzero):
     43 	mov     r0, #0
     44 	bx      lr
     45 #else
     46 //  We use a simple digit by digit algorithm; before we get into the actual
     47 //  divide loop, we must calculate the left-shift amount necessary to align
     48 //  the MSB of the divisor with that of the dividend (If this shift is
     49 //  negative, then the result is zero, and we early out). We also conjure a
     50 //  bit mask of 1 to use in constructing the quotient, and initialize the
     51 //  quotient to zero.
     52     ESTABLISH_FRAME
     53     clz     r4,     a
     54     tst     b,      b   // detect divide-by-zero
     55     clz     r3,     b
     56     mov     q,      #0
     57     beq     LOCAL_LABEL(return)    // return 0 if b is zero.
     58     mov     one,    #1
     59     subs    i,      r3, r4
     60     blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
     61 
     62 LOCAL_LABEL(mainLoop):
     63 //  This loop basically implements the following:
     64 //
     65 //  do {
     66 //      if (a >= b << i) {
     67 //          a -= b << i;
     68 //          q |= 1 << i;
     69 //          if (a == 0) break;
     70 //      }
     71 //  } while (--i)
     72 //
     73 //  Note that this does not perform the final iteration (i == 0); by doing it
     74 //  this way, we can merge the two branches which is a substantial win for
     75 //  such a tight loop on current ARM architectures.
     76     subs    r,      a,  b, lsl i
     77     orrhs   q,      q,one, lsl i
     78     movhs   a,      r
     79     subsne  i,      i, #1
     80     bhi     LOCAL_LABEL(mainLoop)
     81 
     82 //  Do the final test subtraction and update of quotient (i == 0), as it is
     83 //  not performed in the main loop.
     84     subs    r,      a,  b
     85     orrhs   q,      #1
     86     movhs   a,      r
     87 
     88 LOCAL_LABEL(return):
     89 //  Store the remainder, and move the quotient to r0, then return.
     90     str     a,     [r2]
     91     mov     r0,     q
     92     CLEAR_FRAME_AND_RETURN
     93 #endif
     94