Home | History | Annotate | Download | only in i386
      1 // This file is dual licensed under the MIT and the University of Illinois Open
      2 // Source Licenses. See LICENSE.TXT for details.
      3 
      4 #include "../assembly.h"
      5 
      6 // float __floatundisf(du_int a);
      7 
      8 // Note that there is a hardware instruction, fildll, that does most of what
      9 // this function needs to do.  However, because of our ia32 ABI, it will take
     10 // a write-small read-large stall, so the software implementation here is
     11 // actually several cycles faster.
     12 
     13 // This is a branch-free implementation.  A branchy implementation might be
     14 // faster for the common case if you know something a priori about the input
     15 // distribution.
     16 
     17 /* branch-free x87 implementation - one cycle slower than without x87.
     18 
     19 #ifdef __i386__
     20 
     21 .const
     22 .balign 3
     23 
     24 		.quad	0x43f0000000000000
     25 twop64:	.quad	0x0000000000000000
     26 
     27 #define			TWOp64			twop64-0b(%ecx,%eax,8)
     28 
     29 .text
     30 .balign 4
     31 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
     32 	movl		8(%esp),		%eax
     33 	movd		8(%esp),		%xmm1
     34 	movd		4(%esp),		%xmm0
     35 	punpckldq	%xmm1,			%xmm0
     36 	calll		0f
     37 0:	popl		%ecx
     38 	sarl		$31,			%eax
     39 	movq		%xmm0,			4(%esp)
     40 	fildll		4(%esp)
     41 	faddl		TWOp64
     42 	fstps		4(%esp)
     43 	flds		4(%esp)
     44 	ret
     45 END_COMPILERRT_FUNCTION(__floatundisf)
     46 
     47 #endif // __i386__
     48 
     49 */
     50 
     51 /* branch-free, x87-free implementation - faster at the expense of code size */
     52 
     53 #ifdef __i386__
     54 
     55 #ifndef __ELF__
     56 .const
     57 #endif
     58 .balign 8
     59 twop52: .quad 0x4330000000000000
     60 		.quad 0x0000000000000fff
     61 sticky: .quad 0x0000000000000000
     62 		.long 0x00000012
     63 twelve:	.long 0x00000000
     64 
     65 #define			TWOp52			twop52-0b(%ecx)
     66 #define			STICKY			sticky-0b(%ecx,%eax,8)
     67 
     68 .text
     69 .balign 4
     70 DEFINE_COMPILERRT_FUNCTION(__floatundisf)
     71 	movl		8(%esp),		%eax
     72 	movd		8(%esp),		%xmm1
     73 	movd		4(%esp),		%xmm0
     74 	punpckldq	%xmm1,			%xmm0
     75 
     76 	calll		0f
     77 0:	popl		%ecx
     78 	shrl		%eax					// high 31 bits of input as sint32
     79 	addl		$0x7ff80000,	%eax
     80 	sarl		$31,			%eax	// (big input) ? -1 : 0
     81 	movsd		STICKY,			%xmm1	// (big input) ? 0xfff : 0
     82 	movl		$12,			%edx
     83 	andl		%eax,			%edx	// (big input) ? 12 : 0
     84 	movd		%edx,			%xmm3
     85 	andpd		%xmm0,			%xmm1	// (big input) ? input & 0xfff : 0
     86 	movsd		TWOp52,			%xmm2	// 0x1.0p52
     87 	psrlq		%xmm3,			%xmm0	// (big input) ? input >> 12 : input
     88 	orpd		%xmm2,			%xmm1	// 0x1.0p52 + ((big input) ? input & 0xfff : input)
     89 	orpd		%xmm1,			%xmm0	// 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input)
     90 	subsd		%xmm2,			%xmm0	// (double)((big input) ? (input >> 12 | input & 0xfff) : input)
     91 	cvtsd2ss	%xmm0,			%xmm0	// (float)((big input) ? (input >> 12 | input & 0xfff) : input)
     92 	pslld		$23,			%xmm3
     93 	paddd		%xmm3,			%xmm0	// (float)input
     94 	movd		%xmm0,			4(%esp)
     95 	flds		4(%esp)
     96 	ret
     97 END_COMPILERRT_FUNCTION(__floatundisf)
     98 
     99 #endif // __i386__
    100