1 /*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 *===----------------------------------------------------------------------===// 9 * 10 * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 11 * modulus) function for the ARM architecture. A naive digit-by-digit 12 * computation is employed for simplicity. 13 * 14 *===----------------------------------------------------------------------===*/ 15 16 #include "../assembly.h" 17 18 #define ESTABLISH_FRAME \ 19 push {r4, r7, lr} ;\ 20 add r7, sp, #4 21 #define CLEAR_FRAME_AND_RETURN \ 22 pop {r4, r7, pc} 23 24 #define a r0 25 #define b r1 26 #define i r3 27 #define r r4 28 #define q ip 29 #define one lr 30 31 .syntax unified 32 .align 3 33 DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 34 #if __ARM_ARCH_7S__ 35 tst r1, r1 36 beq LOCAL_LABEL(divzero) 37 mov r3, r0 38 udiv r0, r3, r1 39 mls r1, r0, r1, r3 40 str r1, [r2] 41 bx lr 42 LOCAL_LABEL(divzero): 43 mov r0, #0 44 bx lr 45 #else 46 // We use a simple digit by digit algorithm; before we get into the actual 47 // divide loop, we must calculate the left-shift amount necessary to align 48 // the MSB of the divisor with that of the dividend (If this shift is 49 // negative, then the result is zero, and we early out). We also conjure a 50 // bit mask of 1 to use in constructing the quotient, and initialize the 51 // quotient to zero. 52 ESTABLISH_FRAME 53 clz r4, a 54 tst b, b // detect divide-by-zero 55 clz r3, b 56 mov q, #0 57 beq LOCAL_LABEL(return) // return 0 if b is zero. 58 mov one, #1 59 subs i, r3, r4 60 blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) 61 62 LOCAL_LABEL(mainLoop): 63 // This loop basically implements the following: 64 // 65 // do { 66 // if (a >= b << i) { 67 // a -= b << i; 68 // q |= 1 << i; 69 // if (a == 0) break; 70 // } 71 // } while (--i) 72 // 73 // Note that this does not perform the final iteration (i == 0); by doing it 74 // this way, we can merge the two branches which is a substantial win for 75 // such a tight loop on current ARM architectures. 76 subs r, a, b, lsl i 77 orrhs q, q,one, lsl i 78 movhs a, r 79 subsne i, i, #1 80 bhi LOCAL_LABEL(mainLoop) 81 82 // Do the final test subtraction and update of quotient (i == 0), as it is 83 // not performed in the main loop. 84 subs r, a, b 85 orrhs q, #1 86 movhs a, r 87 88 LOCAL_LABEL(return): 89 // Store the remainder, and move the quotient to r0, then return. 90 str a, [r2] 91 mov r0, q 92 CLEAR_FRAME_AND_RETURN 93 #endif 94