Home | History | Annotate | Download | only in arm
      1 /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is dual licensed under the MIT and the University of Illinois Open
      6  * Source Licenses. See LICENSE.TXT for details.
      7  *
      8  *===----------------------------------------------------------------------===//
      9  *
     10  * This file implements the __udivsi3 (32-bit unsigned integer divide)
     11  * function for the ARM architecture.  A naive digit-by-digit computation is
     12  * employed for simplicity.
     13  *
     14  *===----------------------------------------------------------------------===*/
     15 
     16 #include "../assembly.h"
     17 
     18 #define ESTABLISH_FRAME \
     19     push   {r7, lr}    ;\
     20     mov     r7,     sp
     21 #define CLEAR_FRAME_AND_RETURN \
     22     pop    {r7, pc}
     23 
     24 #define a r0
     25 #define b r1
     26 #define r r2
     27 #define i r3
     28 #define q ip
     29 #define one lr
     30 
     31 .syntax unified
     32 .align 3
     33 // Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
     34 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
     35 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
     36 #if __ARM_ARCH_7S__
     37 	tst	r1,r1
     38 	beq	LOCAL_LABEL(divzero)
     39 	udiv	r0, r0, r1
     40 	bx  	lr
     41 	LOCAL_LABEL(divzero):
     42 	mov	r0,#0
     43 	bx	lr
     44 #else
     45 //  We use a simple digit by digit algorithm; before we get into the actual
     46 //  divide loop, we must calculate the left-shift amount necessary to align
     47 //  the MSB of the divisor with that of the dividend (If this shift is
     48 //  negative, then the result is zero, and we early out). We also conjure a
     49 //  bit mask of 1 to use in constructing the quotient, and initialize the
     50 //  quotient to zero.
     51     ESTABLISH_FRAME
     52     clz     r2,     a
     53     tst     b,      b   // detect divide-by-zero
     54     clz     r3,     b
     55     mov     q,      #0
     56     beq     LOCAL_LABEL(return)    // return 0 if b is zero.
     57     mov     one,    #1
     58     subs    i,      r3, r2
     59     blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
     60 
     61 LOCAL_LABEL(mainLoop):
     62 //  This loop basically implements the following:
     63 //
     64 //  do {
     65 //      if (a >= b << i) {
     66 //          a -= b << i;
     67 //          q |= 1 << i;
     68 //          if (a == 0) break;
     69 //      }
     70 //  } while (--i)
     71 //
     72 //  Note that this does not perform the final iteration (i == 0); by doing it
     73 //  this way, we can merge the two branches which is a substantial win for
     74 //  such a tight loop on current ARM architectures.
     75     subs    r,      a,  b, lsl i
     76     orrhs   q,      q,one, lsl i
     77     movhs   a,      r
     78     subsne  i,      i, #1
     79     bhi     LOCAL_LABEL(mainLoop)
     80 
     81 //  Do the final test subtraction and update of quotient (i == 0), as it is
     82 //  not performed in the main loop.
     83     subs    r,      a,  b
     84     orrhs   q,      #1
     85 
     86 LOCAL_LABEL(return):
     87 //  Move the quotient to r0 and return.
     88     mov     r0,     q
     89     CLEAR_FRAME_AND_RETURN
     90 #endif
     91