Home | History | Annotate | Download | only in i386
      1 // This file is dual licensed under the MIT and the University of Illinois Open
      2 // Source Licenses. See LICENSE.TXT for details.
      3 
      4 #include "../assembly.h"
      5 
      6 // di_int __divdi3(di_int a, di_int b);
      7 
      8 // result = a / b.
      9 // both inputs and the output are 64-bit signed integers.
     10 // This will do whatever the underlying hardware is set to do on division by zero.
     11 // No other exceptions are generated, as the divide cannot overflow.
     12 //
     13 // This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
     14 // on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
     15 // currently possible via simulation of integer divides on the x87 unit.
     16 //
     17 // Stephen Canon, December 2008
     18 
     19 #ifdef __i386__
     20 
     21 .text
     22 .balign 4
     23 DEFINE_COMPILERRT_FUNCTION(__divdi3)
     24 
     25 /* This is currently implemented by wrapping the unsigned divide up in an absolute
     26    value, then restoring the correct sign at the end of the computation.  This could
     27    certainly be improved upon. */
     28 
     29 	pushl		%esi
     30 	movl	 20(%esp),			%edx	// high word of b
     31 	movl	 16(%esp),			%eax	// low word of b
     32 	movl		%edx,			%ecx
     33 	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
     34 	xorl		%ecx,			%eax
     35 	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
     36 	subl		%ecx,			%eax
     37 	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
     38 	movl		%edx,		 20(%esp)
     39 	movl		%eax,		 16(%esp)	// store abs(b) back to stack
     40 	movl		%ecx,			%esi	// set aside sign of b
     41 
     42 	movl	 12(%esp),			%edx	// high word of b
     43 	movl	  8(%esp),			%eax	// low word of b
     44 	movl		%edx,			%ecx
     45 	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
     46 	xorl		%ecx,			%eax
     47 	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
     48 	subl		%ecx,			%eax
     49 	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
     50 	movl		%edx,		 12(%esp)
     51 	movl		%eax,		  8(%esp)	// store abs(a) back to stack
     52 	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
     53 
     54 	pushl		%ebx
     55 	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
     56 	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
     57 	jz			9f						// the code to handle that special case [9].
     58 
     59 	/* High word of b is known to be non-zero on this branch */
     60 
     61 	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
     62 
     63 	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
     64 	shrl		%eax					//
     65 	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
     66 	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
     67 	orl			%eax,			%ebx	//
     68 	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
     69 	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
     70 	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
     71 	jae			1f
     72 
     73 	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
     74 
     75 	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
     76 
     77 	pushl		%edi
     78 	notl		%ecx
     79 	shrl		%eax
     80 	shrl		%cl,			%eax	// q = qs >> (1 + i)
     81 	movl		%eax,			%edi
     82 	mull	 24(%esp)					// q*blo
     83 	movl	 16(%esp),			%ebx
     84 	movl	 20(%esp),			%ecx	// ECX:EBX = a
     85 	subl		%eax,			%ebx
     86 	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
     87 	movl	 28(%esp),			%eax
     88 	imull		%edi,			%eax	// q*bhi
     89 	subl		%eax,			%ecx	// ECX:EBX = a - q*b
     90 	sbbl		$0,				%edi	// decrement q if remainder is negative
     91 	xorl		%edx,			%edx
     92 	movl		%edi,			%eax
     93 
     94 	addl		%esi,			%eax	// Restore correct sign to result
     95 	adcl		%esi,			%edx
     96 	xorl		%esi,			%eax
     97 	xorl		%esi,			%edx
     98 	popl		%edi					// Restore callee-save registers
     99 	popl		%ebx
    100 	popl		%esi
    101 	retl								// Return
    102 
    103 
    104 1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
    105 
    106 	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
    107 	divl		%ebx					// overflow, and find q and r such that
    108 										//
    109 										//		ahi:alo = (1:q)*bhi + r
    110 										//
    111 										// Note that q is a number in (31-i).(1+i)
    112 										// fix point.
    113 
    114 	pushl		%edi
    115 	notl		%ecx
    116 	shrl		%eax
    117 	orl			$0x80000000,	%eax
    118 	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
    119 	movl		%eax,			%edi
    120 	mull	 24(%esp)					// q*blo
    121 	movl	 16(%esp),			%ebx
    122 	movl	 20(%esp),			%ecx	// ECX:EBX = a
    123 	subl		%eax,			%ebx
    124 	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
    125 	movl	 28(%esp),			%eax
    126 	imull		%edi,			%eax	// q*bhi
    127 	subl		%eax,			%ecx	// ECX:EBX = a - q*b
    128 	sbbl		$0,				%edi	// decrement q if remainder is negative
    129 	xorl		%edx,			%edx
    130 	movl		%edi,			%eax
    131 
    132 	addl		%esi,			%eax	// Restore correct sign to result
    133 	adcl		%esi,			%edx
    134 	xorl		%esi,			%eax
    135 	xorl		%esi,			%edx
    136 	popl		%edi					// Restore callee-save registers
    137 	popl		%ebx
    138 	popl		%esi
    139 	retl								// Return
    140 
    141 
    142 9:	/* High word of b is zero on this branch */
    143 
    144 	movl	 16(%esp),			%eax	// Find qhi and rhi such that
    145 	movl	 20(%esp),			%ecx	//
    146 	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0  rhi < b
    147 	divl		%ecx					//
    148 	movl		%eax,			%ebx	//
    149 	movl	 12(%esp),			%eax	// Find qlo such that
    150 	divl		%ecx					//
    151 	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0  rlo < b
    152 
    153 	addl		%esi,			%eax	// Restore correct sign to result
    154 	adcl		%esi,			%edx
    155 	xorl		%esi,			%eax
    156 	xorl		%esi,			%edx
    157 	popl		%ebx					// Restore callee-save registers
    158 	popl		%esi
    159 	retl								// Return
    160 END_COMPILERRT_FUNCTION(__divdi3)
    161 
    162 #endif // __i386__
    163 
    164 NO_EXEC_STACK_DIRECTIVE
    165 
    166