1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512DQVL_32_WIN 3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512DQVL_32_LIN 4 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512DQVL_64_WIN 5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512DQVL_64_LIN 6 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512DQ_32_WIN 7 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512DQ_32_LIN 8 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512DQ_64_WIN 9 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512DQ_64_LIN 10 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_WIN,AVX512F_32_WIN 11 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_32,AVX512_32_LIN,AVX512F_32_LIN 12 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X64,AVX512_64,AVX512_64_WIN,AVX512F_64_WIN 13 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,X32,AVX512_64,AVX512_64_LIN,AVX512F_64_LIN 14 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X32,SSE3_32,SSE3_32_WIN 15 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X32,SSE3_32,SSE3_32_LIN 16 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE3_64,SSE3_64_WIN 17 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE3_64,SSE3_64_LIN 18 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE2_32,SSE2_32_WIN 19 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X32,SSE2_32,SSE2_32_LIN 20 ; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE2_64,SSE2_64_WIN 21 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE2_64,SSE2_64_LIN 22 ; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse | FileCheck %s --check-prefixes=CHECK,X32,X87,X87_WIN 23 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse | FileCheck %s --check-prefixes=CHECK,X32,X87,X87_LIN 24 25 ; Check that scalar FP conversions to signed and unsigned int64 are using 26 ; reasonable sequences, across platforms and target switches. 27 ; 28 ; The signed case is straight forward, and the tests here basically 29 ; ensure successful compilation (f80 with avx512 was broken at one point). 30 ; 31 ; For the unsigned case there are many possible sequences, so to avoid 32 ; a fragile test we just check for the presence of a few key instructions. 33 ; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double. 34 ; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd), 35 ; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When 36 ; both a subtract and fnstcw are needed, they can occur in either order. 37 ; 38 ; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp), 39 ; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only). 40 41 define i64 @f_to_u64(float %a) nounwind { 42 ; AVX512DQVL_32_WIN-LABEL: f_to_u64: 43 ; AVX512DQVL_32_WIN: # %bb.0: 44 ; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 45 ; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0 46 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax 47 ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 48 ; AVX512DQVL_32_WIN-NEXT: vzeroupper 49 ; AVX512DQVL_32_WIN-NEXT: retl 50 ; 51 ; AVX512DQVL_32_LIN-LABEL: f_to_u64: 52 ; AVX512DQVL_32_LIN: # %bb.0: 53 ; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 54 ; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0 55 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax 56 ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 57 ; AVX512DQVL_32_LIN-NEXT: vzeroupper 58 ; AVX512DQVL_32_LIN-NEXT: retl 59 ; 60 ; AVX512_64-LABEL: f_to_u64: 61 ; AVX512_64: # %bb.0: 62 ; AVX512_64-NEXT: vcvttss2usi %xmm0, %rax 63 ; AVX512_64-NEXT: retq 64 ; 65 ; AVX512DQ_32_WIN-LABEL: f_to_u64: 66 ; AVX512DQ_32_WIN: # %bb.0: 67 ; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 68 ; AVX512DQ_32_WIN-NEXT: vcvttps2uqq %ymm0, %zmm0 69 ; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax 70 ; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 71 ; AVX512DQ_32_WIN-NEXT: vzeroupper 72 ; AVX512DQ_32_WIN-NEXT: retl 73 ; 74 ; AVX512DQ_32_LIN-LABEL: f_to_u64: 75 ; AVX512DQ_32_LIN: # %bb.0: 76 ; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 77 ; AVX512DQ_32_LIN-NEXT: vcvttps2uqq %ymm0, %zmm0 78 ; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax 79 ; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 80 ; AVX512DQ_32_LIN-NEXT: vzeroupper 81 ; AVX512DQ_32_LIN-NEXT: retl 82 ; 83 ; AVX512F_32_WIN-LABEL: f_to_u64: 84 ; AVX512F_32_WIN: # %bb.0: 85 ; AVX512F_32_WIN-NEXT: pushl %ebp 86 ; AVX512F_32_WIN-NEXT: movl %esp, %ebp 87 ; AVX512F_32_WIN-NEXT: andl $-8, %esp 88 ; AVX512F_32_WIN-NEXT: subl $16, %esp 89 ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 90 ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 91 ; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 92 ; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 93 ; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} 94 ; AVX512F_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 95 ; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 96 ; AVX512F_32_WIN-NEXT: fisttpll (%esp) 97 ; AVX512F_32_WIN-NEXT: xorl %edx, %edx 98 ; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1 99 ; AVX512F_32_WIN-NEXT: setbe %dl 100 ; AVX512F_32_WIN-NEXT: shll $31, %edx 101 ; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 102 ; AVX512F_32_WIN-NEXT: movl (%esp), %eax 103 ; AVX512F_32_WIN-NEXT: movl %ebp, %esp 104 ; AVX512F_32_WIN-NEXT: popl %ebp 105 ; AVX512F_32_WIN-NEXT: retl 106 ; 107 ; AVX512F_32_LIN-LABEL: f_to_u64: 108 ; AVX512F_32_LIN: # %bb.0: 109 ; AVX512F_32_LIN-NEXT: subl $20, %esp 110 ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 111 ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 112 ; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 113 ; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 114 ; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} 115 ; AVX512F_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 116 ; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 117 ; AVX512F_32_LIN-NEXT: fisttpll (%esp) 118 ; AVX512F_32_LIN-NEXT: xorl %edx, %edx 119 ; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1 120 ; AVX512F_32_LIN-NEXT: setbe %dl 121 ; AVX512F_32_LIN-NEXT: shll $31, %edx 122 ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 123 ; AVX512F_32_LIN-NEXT: movl (%esp), %eax 124 ; AVX512F_32_LIN-NEXT: addl $20, %esp 125 ; AVX512F_32_LIN-NEXT: retl 126 ; 127 ; SSE3_32_WIN-LABEL: f_to_u64: 128 ; SSE3_32_WIN: # %bb.0: 129 ; SSE3_32_WIN-NEXT: pushl %ebp 130 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 131 ; SSE3_32_WIN-NEXT: andl $-8, %esp 132 ; SSE3_32_WIN-NEXT: subl $16, %esp 133 ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 134 ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 135 ; SSE3_32_WIN-NEXT: movaps %xmm0, %xmm2 136 ; SSE3_32_WIN-NEXT: cmpltss %xmm1, %xmm2 137 ; SSE3_32_WIN-NEXT: movaps %xmm2, %xmm3 138 ; SSE3_32_WIN-NEXT: andps %xmm0, %xmm2 139 ; SSE3_32_WIN-NEXT: xorl %edx, %edx 140 ; SSE3_32_WIN-NEXT: ucomiss %xmm0, %xmm1 141 ; SSE3_32_WIN-NEXT: subss %xmm1, %xmm0 142 ; SSE3_32_WIN-NEXT: andnps %xmm0, %xmm3 143 ; SSE3_32_WIN-NEXT: orps %xmm3, %xmm2 144 ; SSE3_32_WIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) 145 ; SSE3_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 146 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 147 ; SSE3_32_WIN-NEXT: setbe %dl 148 ; SSE3_32_WIN-NEXT: shll $31, %edx 149 ; SSE3_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 150 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 151 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 152 ; SSE3_32_WIN-NEXT: popl %ebp 153 ; SSE3_32_WIN-NEXT: retl 154 ; 155 ; SSE3_32_LIN-LABEL: f_to_u64: 156 ; SSE3_32_LIN: # %bb.0: 157 ; SSE3_32_LIN-NEXT: subl $20, %esp 158 ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 159 ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 160 ; SSE3_32_LIN-NEXT: movaps %xmm0, %xmm2 161 ; SSE3_32_LIN-NEXT: cmpltss %xmm1, %xmm2 162 ; SSE3_32_LIN-NEXT: movaps %xmm2, %xmm3 163 ; SSE3_32_LIN-NEXT: andps %xmm0, %xmm2 164 ; SSE3_32_LIN-NEXT: xorl %edx, %edx 165 ; SSE3_32_LIN-NEXT: ucomiss %xmm0, %xmm1 166 ; SSE3_32_LIN-NEXT: subss %xmm1, %xmm0 167 ; SSE3_32_LIN-NEXT: andnps %xmm0, %xmm3 168 ; SSE3_32_LIN-NEXT: orps %xmm3, %xmm2 169 ; SSE3_32_LIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) 170 ; SSE3_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 171 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 172 ; SSE3_32_LIN-NEXT: setbe %dl 173 ; SSE3_32_LIN-NEXT: shll $31, %edx 174 ; SSE3_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 175 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 176 ; SSE3_32_LIN-NEXT: addl $20, %esp 177 ; SSE3_32_LIN-NEXT: retl 178 ; 179 ; SSE3_64-LABEL: f_to_u64: 180 ; SSE3_64: # %bb.0: 181 ; SSE3_64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 182 ; SSE3_64-NEXT: movaps %xmm0, %xmm2 183 ; SSE3_64-NEXT: subss %xmm1, %xmm2 184 ; SSE3_64-NEXT: cvttss2si %xmm2, %rax 185 ; SSE3_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 186 ; SSE3_64-NEXT: xorq %rax, %rcx 187 ; SSE3_64-NEXT: cvttss2si %xmm0, %rax 188 ; SSE3_64-NEXT: ucomiss %xmm1, %xmm0 189 ; SSE3_64-NEXT: cmovaeq %rcx, %rax 190 ; SSE3_64-NEXT: retq 191 ; 192 ; SSE2_32_WIN-LABEL: f_to_u64: 193 ; SSE2_32_WIN: # %bb.0: 194 ; SSE2_32_WIN-NEXT: pushl %ebp 195 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 196 ; SSE2_32_WIN-NEXT: andl $-8, %esp 197 ; SSE2_32_WIN-NEXT: subl $24, %esp 198 ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 199 ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 200 ; SSE2_32_WIN-NEXT: movaps %xmm0, %xmm2 201 ; SSE2_32_WIN-NEXT: cmpltss %xmm1, %xmm2 202 ; SSE2_32_WIN-NEXT: movaps %xmm2, %xmm3 203 ; SSE2_32_WIN-NEXT: andps %xmm0, %xmm2 204 ; SSE2_32_WIN-NEXT: xorl %edx, %edx 205 ; SSE2_32_WIN-NEXT: ucomiss %xmm0, %xmm1 206 ; SSE2_32_WIN-NEXT: subss %xmm1, %xmm0 207 ; SSE2_32_WIN-NEXT: andnps %xmm0, %xmm3 208 ; SSE2_32_WIN-NEXT: orps %xmm3, %xmm2 209 ; SSE2_32_WIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) 210 ; SSE2_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 211 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 212 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 213 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 214 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 215 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 216 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 217 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 218 ; SSE2_32_WIN-NEXT: setbe %dl 219 ; SSE2_32_WIN-NEXT: shll $31, %edx 220 ; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 221 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 222 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 223 ; SSE2_32_WIN-NEXT: popl %ebp 224 ; SSE2_32_WIN-NEXT: retl 225 ; 226 ; SSE2_32_LIN-LABEL: f_to_u64: 227 ; SSE2_32_LIN: # %bb.0: 228 ; SSE2_32_LIN-NEXT: subl $28, %esp 229 ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 230 ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 231 ; SSE2_32_LIN-NEXT: movaps %xmm0, %xmm2 232 ; SSE2_32_LIN-NEXT: cmpltss %xmm1, %xmm2 233 ; SSE2_32_LIN-NEXT: movaps %xmm2, %xmm3 234 ; SSE2_32_LIN-NEXT: andps %xmm0, %xmm2 235 ; SSE2_32_LIN-NEXT: xorl %edx, %edx 236 ; SSE2_32_LIN-NEXT: ucomiss %xmm0, %xmm1 237 ; SSE2_32_LIN-NEXT: subss %xmm1, %xmm0 238 ; SSE2_32_LIN-NEXT: andnps %xmm0, %xmm3 239 ; SSE2_32_LIN-NEXT: orps %xmm3, %xmm2 240 ; SSE2_32_LIN-NEXT: movss %xmm2, {{[0-9]+}}(%esp) 241 ; SSE2_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 242 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 243 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 244 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 245 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 246 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 247 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 248 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 249 ; SSE2_32_LIN-NEXT: setbe %dl 250 ; SSE2_32_LIN-NEXT: shll $31, %edx 251 ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 252 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 253 ; SSE2_32_LIN-NEXT: addl $28, %esp 254 ; SSE2_32_LIN-NEXT: retl 255 ; 256 ; SSE2_64-LABEL: f_to_u64: 257 ; SSE2_64: # %bb.0: 258 ; SSE2_64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 259 ; SSE2_64-NEXT: movaps %xmm0, %xmm2 260 ; SSE2_64-NEXT: subss %xmm1, %xmm2 261 ; SSE2_64-NEXT: cvttss2si %xmm2, %rax 262 ; SSE2_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 263 ; SSE2_64-NEXT: xorq %rax, %rcx 264 ; SSE2_64-NEXT: cvttss2si %xmm0, %rax 265 ; SSE2_64-NEXT: ucomiss %xmm1, %xmm0 266 ; SSE2_64-NEXT: cmovaeq %rcx, %rax 267 ; SSE2_64-NEXT: retq 268 ; 269 ; X87_WIN-LABEL: f_to_u64: 270 ; X87_WIN: # %bb.0: 271 ; X87_WIN-NEXT: pushl %ebp 272 ; X87_WIN-NEXT: movl %esp, %ebp 273 ; X87_WIN-NEXT: andl $-8, %esp 274 ; X87_WIN-NEXT: subl $16, %esp 275 ; X87_WIN-NEXT: flds 8(%ebp) 276 ; X87_WIN-NEXT: flds __real@5f000000 277 ; X87_WIN-NEXT: fld %st(1) 278 ; X87_WIN-NEXT: fsub %st(1) 279 ; X87_WIN-NEXT: fxch %st(1) 280 ; X87_WIN-NEXT: fucomp %st(2) 281 ; X87_WIN-NEXT: fnstsw %ax 282 ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax 283 ; X87_WIN-NEXT: sahf 284 ; X87_WIN-NEXT: ja LBB0_2 285 ; X87_WIN-NEXT: # %bb.1: 286 ; X87_WIN-NEXT: fstp %st(1) 287 ; X87_WIN-NEXT: fldz 288 ; X87_WIN-NEXT: LBB0_2: 289 ; X87_WIN-NEXT: fstp %st(0) 290 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 291 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 292 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 293 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 294 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 295 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 296 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 297 ; X87_WIN-NEXT: setbe %al 298 ; X87_WIN-NEXT: movzbl %al, %edx 299 ; X87_WIN-NEXT: shll $31, %edx 300 ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 301 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 302 ; X87_WIN-NEXT: movl %ebp, %esp 303 ; X87_WIN-NEXT: popl %ebp 304 ; X87_WIN-NEXT: retl 305 ; 306 ; X87_LIN-LABEL: f_to_u64: 307 ; X87_LIN: # %bb.0: 308 ; X87_LIN-NEXT: subl $20, %esp 309 ; X87_LIN-NEXT: flds {{[0-9]+}}(%esp) 310 ; X87_LIN-NEXT: flds {{\.LCPI.*}} 311 ; X87_LIN-NEXT: fld %st(1) 312 ; X87_LIN-NEXT: fsub %st(1) 313 ; X87_LIN-NEXT: fxch %st(1) 314 ; X87_LIN-NEXT: fucomp %st(2) 315 ; X87_LIN-NEXT: fnstsw %ax 316 ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax 317 ; X87_LIN-NEXT: sahf 318 ; X87_LIN-NEXT: ja .LBB0_2 319 ; X87_LIN-NEXT: # %bb.1: 320 ; X87_LIN-NEXT: fstp %st(1) 321 ; X87_LIN-NEXT: fldz 322 ; X87_LIN-NEXT: .LBB0_2: 323 ; X87_LIN-NEXT: fstp %st(0) 324 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 325 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 326 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 327 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 328 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 329 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 330 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 331 ; X87_LIN-NEXT: setbe %al 332 ; X87_LIN-NEXT: movzbl %al, %edx 333 ; X87_LIN-NEXT: shll $31, %edx 334 ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 335 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 336 ; X87_LIN-NEXT: addl $20, %esp 337 ; X87_LIN-NEXT: retl 338 %r = fptoui float %a to i64 339 ret i64 %r 340 } 341 342 define i64 @f_to_s64(float %a) nounwind { 343 ; AVX512DQVL_32_WIN-LABEL: f_to_s64: 344 ; AVX512DQVL_32_WIN: # %bb.0: 345 ; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 346 ; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0 347 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax 348 ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 349 ; AVX512DQVL_32_WIN-NEXT: vzeroupper 350 ; AVX512DQVL_32_WIN-NEXT: retl 351 ; 352 ; AVX512DQVL_32_LIN-LABEL: f_to_s64: 353 ; AVX512DQVL_32_LIN: # %bb.0: 354 ; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 355 ; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0 356 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax 357 ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 358 ; AVX512DQVL_32_LIN-NEXT: vzeroupper 359 ; AVX512DQVL_32_LIN-NEXT: retl 360 ; 361 ; AVX512_64-LABEL: f_to_s64: 362 ; AVX512_64: # %bb.0: 363 ; AVX512_64-NEXT: vcvttss2si %xmm0, %rax 364 ; AVX512_64-NEXT: retq 365 ; 366 ; AVX512DQ_32_WIN-LABEL: f_to_s64: 367 ; AVX512DQ_32_WIN: # %bb.0: 368 ; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 369 ; AVX512DQ_32_WIN-NEXT: vcvttps2qq %ymm0, %zmm0 370 ; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax 371 ; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 372 ; AVX512DQ_32_WIN-NEXT: vzeroupper 373 ; AVX512DQ_32_WIN-NEXT: retl 374 ; 375 ; AVX512DQ_32_LIN-LABEL: f_to_s64: 376 ; AVX512DQ_32_LIN: # %bb.0: 377 ; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 378 ; AVX512DQ_32_LIN-NEXT: vcvttps2qq %ymm0, %zmm0 379 ; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax 380 ; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 381 ; AVX512DQ_32_LIN-NEXT: vzeroupper 382 ; AVX512DQ_32_LIN-NEXT: retl 383 ; 384 ; AVX512F_32_WIN-LABEL: f_to_s64: 385 ; AVX512F_32_WIN: # %bb.0: 386 ; AVX512F_32_WIN-NEXT: pushl %ebp 387 ; AVX512F_32_WIN-NEXT: movl %esp, %ebp 388 ; AVX512F_32_WIN-NEXT: andl $-8, %esp 389 ; AVX512F_32_WIN-NEXT: subl $16, %esp 390 ; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 391 ; AVX512F_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 392 ; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 393 ; AVX512F_32_WIN-NEXT: fisttpll (%esp) 394 ; AVX512F_32_WIN-NEXT: movl (%esp), %eax 395 ; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 396 ; AVX512F_32_WIN-NEXT: movl %ebp, %esp 397 ; AVX512F_32_WIN-NEXT: popl %ebp 398 ; AVX512F_32_WIN-NEXT: retl 399 ; 400 ; AVX512F_32_LIN-LABEL: f_to_s64: 401 ; AVX512F_32_LIN: # %bb.0: 402 ; AVX512F_32_LIN-NEXT: subl $20, %esp 403 ; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 404 ; AVX512F_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 405 ; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 406 ; AVX512F_32_LIN-NEXT: fisttpll (%esp) 407 ; AVX512F_32_LIN-NEXT: movl (%esp), %eax 408 ; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 409 ; AVX512F_32_LIN-NEXT: addl $20, %esp 410 ; AVX512F_32_LIN-NEXT: retl 411 ; 412 ; SSE3_32_WIN-LABEL: f_to_s64: 413 ; SSE3_32_WIN: # %bb.0: 414 ; SSE3_32_WIN-NEXT: pushl %ebp 415 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 416 ; SSE3_32_WIN-NEXT: andl $-8, %esp 417 ; SSE3_32_WIN-NEXT: subl $16, %esp 418 ; SSE3_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 419 ; SSE3_32_WIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 420 ; SSE3_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 421 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 422 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 423 ; SSE3_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 424 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 425 ; SSE3_32_WIN-NEXT: popl %ebp 426 ; SSE3_32_WIN-NEXT: retl 427 ; 428 ; SSE3_32_LIN-LABEL: f_to_s64: 429 ; SSE3_32_LIN: # %bb.0: 430 ; SSE3_32_LIN-NEXT: subl $20, %esp 431 ; SSE3_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 432 ; SSE3_32_LIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 433 ; SSE3_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 434 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 435 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 436 ; SSE3_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 437 ; SSE3_32_LIN-NEXT: addl $20, %esp 438 ; SSE3_32_LIN-NEXT: retl 439 ; 440 ; SSE3_64-LABEL: f_to_s64: 441 ; SSE3_64: # %bb.0: 442 ; SSE3_64-NEXT: cvttss2si %xmm0, %rax 443 ; SSE3_64-NEXT: retq 444 ; 445 ; SSE2_32_WIN-LABEL: f_to_s64: 446 ; SSE2_32_WIN: # %bb.0: 447 ; SSE2_32_WIN-NEXT: pushl %ebp 448 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 449 ; SSE2_32_WIN-NEXT: andl $-8, %esp 450 ; SSE2_32_WIN-NEXT: subl $24, %esp 451 ; SSE2_32_WIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 452 ; SSE2_32_WIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 453 ; SSE2_32_WIN-NEXT: flds {{[0-9]+}}(%esp) 454 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 455 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 456 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 457 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 458 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 459 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 460 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 461 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 462 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 463 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 464 ; SSE2_32_WIN-NEXT: popl %ebp 465 ; SSE2_32_WIN-NEXT: retl 466 ; 467 ; SSE2_32_LIN-LABEL: f_to_s64: 468 ; SSE2_32_LIN: # %bb.0: 469 ; SSE2_32_LIN-NEXT: subl $28, %esp 470 ; SSE2_32_LIN-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 471 ; SSE2_32_LIN-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 472 ; SSE2_32_LIN-NEXT: flds {{[0-9]+}}(%esp) 473 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 474 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 475 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 476 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 477 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 478 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 479 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 480 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 481 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 482 ; SSE2_32_LIN-NEXT: addl $28, %esp 483 ; SSE2_32_LIN-NEXT: retl 484 ; 485 ; SSE2_64-LABEL: f_to_s64: 486 ; SSE2_64: # %bb.0: 487 ; SSE2_64-NEXT: cvttss2si %xmm0, %rax 488 ; SSE2_64-NEXT: retq 489 ; 490 ; X87_WIN-LABEL: f_to_s64: 491 ; X87_WIN: # %bb.0: 492 ; X87_WIN-NEXT: pushl %ebp 493 ; X87_WIN-NEXT: movl %esp, %ebp 494 ; X87_WIN-NEXT: andl $-8, %esp 495 ; X87_WIN-NEXT: subl $16, %esp 496 ; X87_WIN-NEXT: flds 8(%ebp) 497 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 498 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 499 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 500 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 501 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 502 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 503 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 504 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 505 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 506 ; X87_WIN-NEXT: movl %ebp, %esp 507 ; X87_WIN-NEXT: popl %ebp 508 ; X87_WIN-NEXT: retl 509 ; 510 ; X87_LIN-LABEL: f_to_s64: 511 ; X87_LIN: # %bb.0: 512 ; X87_LIN-NEXT: subl $20, %esp 513 ; X87_LIN-NEXT: flds {{[0-9]+}}(%esp) 514 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 515 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 516 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 517 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 518 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 519 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 520 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 521 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 522 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 523 ; X87_LIN-NEXT: addl $20, %esp 524 ; X87_LIN-NEXT: retl 525 %r = fptosi float %a to i64 526 ret i64 %r 527 } 528 529 define i64 @d_to_u64(double %a) nounwind { 530 ; AVX512DQVL_32_WIN-LABEL: d_to_u64: 531 ; AVX512DQVL_32_WIN: # %bb.0: 532 ; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 533 ; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 534 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax 535 ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 536 ; AVX512DQVL_32_WIN-NEXT: vzeroupper 537 ; AVX512DQVL_32_WIN-NEXT: retl 538 ; 539 ; AVX512DQVL_32_LIN-LABEL: d_to_u64: 540 ; AVX512DQVL_32_LIN: # %bb.0: 541 ; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 542 ; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 543 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax 544 ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 545 ; AVX512DQVL_32_LIN-NEXT: vzeroupper 546 ; AVX512DQVL_32_LIN-NEXT: retl 547 ; 548 ; AVX512_64-LABEL: d_to_u64: 549 ; AVX512_64: # %bb.0: 550 ; AVX512_64-NEXT: vcvttsd2usi %xmm0, %rax 551 ; AVX512_64-NEXT: retq 552 ; 553 ; AVX512DQ_32_WIN-LABEL: d_to_u64: 554 ; AVX512DQ_32_WIN: # %bb.0: 555 ; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 556 ; AVX512DQ_32_WIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 557 ; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax 558 ; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 559 ; AVX512DQ_32_WIN-NEXT: vzeroupper 560 ; AVX512DQ_32_WIN-NEXT: retl 561 ; 562 ; AVX512DQ_32_LIN-LABEL: d_to_u64: 563 ; AVX512DQ_32_LIN: # %bb.0: 564 ; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 565 ; AVX512DQ_32_LIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 566 ; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax 567 ; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 568 ; AVX512DQ_32_LIN-NEXT: vzeroupper 569 ; AVX512DQ_32_LIN-NEXT: retl 570 ; 571 ; AVX512F_32_WIN-LABEL: d_to_u64: 572 ; AVX512F_32_WIN: # %bb.0: 573 ; AVX512F_32_WIN-NEXT: pushl %ebp 574 ; AVX512F_32_WIN-NEXT: movl %esp, %ebp 575 ; AVX512F_32_WIN-NEXT: andl $-8, %esp 576 ; AVX512F_32_WIN-NEXT: subl $16, %esp 577 ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 578 ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 579 ; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 580 ; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 581 ; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} 582 ; AVX512F_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 583 ; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 584 ; AVX512F_32_WIN-NEXT: fisttpll (%esp) 585 ; AVX512F_32_WIN-NEXT: xorl %edx, %edx 586 ; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1 587 ; AVX512F_32_WIN-NEXT: setbe %dl 588 ; AVX512F_32_WIN-NEXT: shll $31, %edx 589 ; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 590 ; AVX512F_32_WIN-NEXT: movl (%esp), %eax 591 ; AVX512F_32_WIN-NEXT: movl %ebp, %esp 592 ; AVX512F_32_WIN-NEXT: popl %ebp 593 ; AVX512F_32_WIN-NEXT: retl 594 ; 595 ; AVX512F_32_LIN-LABEL: d_to_u64: 596 ; AVX512F_32_LIN: # %bb.0: 597 ; AVX512F_32_LIN-NEXT: subl $20, %esp 598 ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 599 ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 600 ; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 601 ; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 602 ; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} 603 ; AVX512F_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 604 ; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 605 ; AVX512F_32_LIN-NEXT: fisttpll (%esp) 606 ; AVX512F_32_LIN-NEXT: xorl %edx, %edx 607 ; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1 608 ; AVX512F_32_LIN-NEXT: setbe %dl 609 ; AVX512F_32_LIN-NEXT: shll $31, %edx 610 ; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 611 ; AVX512F_32_LIN-NEXT: movl (%esp), %eax 612 ; AVX512F_32_LIN-NEXT: addl $20, %esp 613 ; AVX512F_32_LIN-NEXT: retl 614 ; 615 ; SSE3_32_WIN-LABEL: d_to_u64: 616 ; SSE3_32_WIN: # %bb.0: 617 ; SSE3_32_WIN-NEXT: pushl %ebp 618 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 619 ; SSE3_32_WIN-NEXT: andl $-8, %esp 620 ; SSE3_32_WIN-NEXT: subl $16, %esp 621 ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 622 ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 623 ; SSE3_32_WIN-NEXT: movapd %xmm0, %xmm2 624 ; SSE3_32_WIN-NEXT: cmpltsd %xmm1, %xmm2 625 ; SSE3_32_WIN-NEXT: movapd %xmm2, %xmm3 626 ; SSE3_32_WIN-NEXT: andpd %xmm0, %xmm2 627 ; SSE3_32_WIN-NEXT: xorl %edx, %edx 628 ; SSE3_32_WIN-NEXT: ucomisd %xmm0, %xmm1 629 ; SSE3_32_WIN-NEXT: subsd %xmm1, %xmm0 630 ; SSE3_32_WIN-NEXT: andnpd %xmm0, %xmm3 631 ; SSE3_32_WIN-NEXT: orpd %xmm3, %xmm2 632 ; SSE3_32_WIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) 633 ; SSE3_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 634 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 635 ; SSE3_32_WIN-NEXT: setbe %dl 636 ; SSE3_32_WIN-NEXT: shll $31, %edx 637 ; SSE3_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 638 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 639 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 640 ; SSE3_32_WIN-NEXT: popl %ebp 641 ; SSE3_32_WIN-NEXT: retl 642 ; 643 ; SSE3_32_LIN-LABEL: d_to_u64: 644 ; SSE3_32_LIN: # %bb.0: 645 ; SSE3_32_LIN-NEXT: subl $20, %esp 646 ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 647 ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 648 ; SSE3_32_LIN-NEXT: movapd %xmm0, %xmm2 649 ; SSE3_32_LIN-NEXT: cmpltsd %xmm1, %xmm2 650 ; SSE3_32_LIN-NEXT: movapd %xmm2, %xmm3 651 ; SSE3_32_LIN-NEXT: andpd %xmm0, %xmm2 652 ; SSE3_32_LIN-NEXT: xorl %edx, %edx 653 ; SSE3_32_LIN-NEXT: ucomisd %xmm0, %xmm1 654 ; SSE3_32_LIN-NEXT: subsd %xmm1, %xmm0 655 ; SSE3_32_LIN-NEXT: andnpd %xmm0, %xmm3 656 ; SSE3_32_LIN-NEXT: orpd %xmm3, %xmm2 657 ; SSE3_32_LIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) 658 ; SSE3_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 659 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 660 ; SSE3_32_LIN-NEXT: setbe %dl 661 ; SSE3_32_LIN-NEXT: shll $31, %edx 662 ; SSE3_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 663 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 664 ; SSE3_32_LIN-NEXT: addl $20, %esp 665 ; SSE3_32_LIN-NEXT: retl 666 ; 667 ; SSE3_64-LABEL: d_to_u64: 668 ; SSE3_64: # %bb.0: 669 ; SSE3_64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 670 ; SSE3_64-NEXT: movapd %xmm0, %xmm2 671 ; SSE3_64-NEXT: subsd %xmm1, %xmm2 672 ; SSE3_64-NEXT: cvttsd2si %xmm2, %rax 673 ; SSE3_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 674 ; SSE3_64-NEXT: xorq %rax, %rcx 675 ; SSE3_64-NEXT: cvttsd2si %xmm0, %rax 676 ; SSE3_64-NEXT: ucomisd %xmm1, %xmm0 677 ; SSE3_64-NEXT: cmovaeq %rcx, %rax 678 ; SSE3_64-NEXT: retq 679 ; 680 ; SSE2_32_WIN-LABEL: d_to_u64: 681 ; SSE2_32_WIN: # %bb.0: 682 ; SSE2_32_WIN-NEXT: pushl %ebp 683 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 684 ; SSE2_32_WIN-NEXT: andl $-8, %esp 685 ; SSE2_32_WIN-NEXT: subl $24, %esp 686 ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 687 ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 688 ; SSE2_32_WIN-NEXT: movapd %xmm0, %xmm2 689 ; SSE2_32_WIN-NEXT: cmpltsd %xmm1, %xmm2 690 ; SSE2_32_WIN-NEXT: movapd %xmm2, %xmm3 691 ; SSE2_32_WIN-NEXT: andpd %xmm0, %xmm2 692 ; SSE2_32_WIN-NEXT: xorl %edx, %edx 693 ; SSE2_32_WIN-NEXT: ucomisd %xmm0, %xmm1 694 ; SSE2_32_WIN-NEXT: subsd %xmm1, %xmm0 695 ; SSE2_32_WIN-NEXT: andnpd %xmm0, %xmm3 696 ; SSE2_32_WIN-NEXT: orpd %xmm3, %xmm2 697 ; SSE2_32_WIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) 698 ; SSE2_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 699 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 700 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 701 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 702 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 703 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 704 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 705 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 706 ; SSE2_32_WIN-NEXT: setbe %dl 707 ; SSE2_32_WIN-NEXT: shll $31, %edx 708 ; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 709 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 710 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 711 ; SSE2_32_WIN-NEXT: popl %ebp 712 ; SSE2_32_WIN-NEXT: retl 713 ; 714 ; SSE2_32_LIN-LABEL: d_to_u64: 715 ; SSE2_32_LIN: # %bb.0: 716 ; SSE2_32_LIN-NEXT: subl $28, %esp 717 ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 718 ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 719 ; SSE2_32_LIN-NEXT: movapd %xmm0, %xmm2 720 ; SSE2_32_LIN-NEXT: cmpltsd %xmm1, %xmm2 721 ; SSE2_32_LIN-NEXT: movapd %xmm2, %xmm3 722 ; SSE2_32_LIN-NEXT: andpd %xmm0, %xmm2 723 ; SSE2_32_LIN-NEXT: xorl %edx, %edx 724 ; SSE2_32_LIN-NEXT: ucomisd %xmm0, %xmm1 725 ; SSE2_32_LIN-NEXT: subsd %xmm1, %xmm0 726 ; SSE2_32_LIN-NEXT: andnpd %xmm0, %xmm3 727 ; SSE2_32_LIN-NEXT: orpd %xmm3, %xmm2 728 ; SSE2_32_LIN-NEXT: movsd %xmm2, {{[0-9]+}}(%esp) 729 ; SSE2_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 730 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 731 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 732 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 733 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 734 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 735 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 736 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 737 ; SSE2_32_LIN-NEXT: setbe %dl 738 ; SSE2_32_LIN-NEXT: shll $31, %edx 739 ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 740 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 741 ; SSE2_32_LIN-NEXT: addl $28, %esp 742 ; SSE2_32_LIN-NEXT: retl 743 ; 744 ; SSE2_64-LABEL: d_to_u64: 745 ; SSE2_64: # %bb.0: 746 ; SSE2_64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 747 ; SSE2_64-NEXT: movapd %xmm0, %xmm2 748 ; SSE2_64-NEXT: subsd %xmm1, %xmm2 749 ; SSE2_64-NEXT: cvttsd2si %xmm2, %rax 750 ; SSE2_64-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 751 ; SSE2_64-NEXT: xorq %rax, %rcx 752 ; SSE2_64-NEXT: cvttsd2si %xmm0, %rax 753 ; SSE2_64-NEXT: ucomisd %xmm1, %xmm0 754 ; SSE2_64-NEXT: cmovaeq %rcx, %rax 755 ; SSE2_64-NEXT: retq 756 ; 757 ; X87_WIN-LABEL: d_to_u64: 758 ; X87_WIN: # %bb.0: 759 ; X87_WIN-NEXT: pushl %ebp 760 ; X87_WIN-NEXT: movl %esp, %ebp 761 ; X87_WIN-NEXT: andl $-8, %esp 762 ; X87_WIN-NEXT: subl $16, %esp 763 ; X87_WIN-NEXT: fldl 8(%ebp) 764 ; X87_WIN-NEXT: flds __real@5f000000 765 ; X87_WIN-NEXT: fld %st(1) 766 ; X87_WIN-NEXT: fsub %st(1) 767 ; X87_WIN-NEXT: fxch %st(1) 768 ; X87_WIN-NEXT: fucomp %st(2) 769 ; X87_WIN-NEXT: fnstsw %ax 770 ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax 771 ; X87_WIN-NEXT: sahf 772 ; X87_WIN-NEXT: ja LBB2_2 773 ; X87_WIN-NEXT: # %bb.1: 774 ; X87_WIN-NEXT: fstp %st(1) 775 ; X87_WIN-NEXT: fldz 776 ; X87_WIN-NEXT: LBB2_2: 777 ; X87_WIN-NEXT: fstp %st(0) 778 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 779 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 780 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 781 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 782 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 783 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 784 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 785 ; X87_WIN-NEXT: setbe %al 786 ; X87_WIN-NEXT: movzbl %al, %edx 787 ; X87_WIN-NEXT: shll $31, %edx 788 ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 789 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 790 ; X87_WIN-NEXT: movl %ebp, %esp 791 ; X87_WIN-NEXT: popl %ebp 792 ; X87_WIN-NEXT: retl 793 ; 794 ; X87_LIN-LABEL: d_to_u64: 795 ; X87_LIN: # %bb.0: 796 ; X87_LIN-NEXT: subl $20, %esp 797 ; X87_LIN-NEXT: fldl {{[0-9]+}}(%esp) 798 ; X87_LIN-NEXT: flds {{\.LCPI.*}} 799 ; X87_LIN-NEXT: fld %st(1) 800 ; X87_LIN-NEXT: fsub %st(1) 801 ; X87_LIN-NEXT: fxch %st(1) 802 ; X87_LIN-NEXT: fucomp %st(2) 803 ; X87_LIN-NEXT: fnstsw %ax 804 ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax 805 ; X87_LIN-NEXT: sahf 806 ; X87_LIN-NEXT: ja .LBB2_2 807 ; X87_LIN-NEXT: # %bb.1: 808 ; X87_LIN-NEXT: fstp %st(1) 809 ; X87_LIN-NEXT: fldz 810 ; X87_LIN-NEXT: .LBB2_2: 811 ; X87_LIN-NEXT: fstp %st(0) 812 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 813 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 814 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 815 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 816 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 817 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 818 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 819 ; X87_LIN-NEXT: setbe %al 820 ; X87_LIN-NEXT: movzbl %al, %edx 821 ; X87_LIN-NEXT: shll $31, %edx 822 ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 823 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 824 ; X87_LIN-NEXT: addl $20, %esp 825 ; X87_LIN-NEXT: retl 826 %r = fptoui double %a to i64 827 ret i64 %r 828 } 829 830 define i64 @d_to_s64(double %a) nounwind { 831 ; AVX512DQVL_32_WIN-LABEL: d_to_s64: 832 ; AVX512DQVL_32_WIN: # %bb.0: 833 ; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 834 ; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0 835 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax 836 ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 837 ; AVX512DQVL_32_WIN-NEXT: vzeroupper 838 ; AVX512DQVL_32_WIN-NEXT: retl 839 ; 840 ; AVX512DQVL_32_LIN-LABEL: d_to_s64: 841 ; AVX512DQVL_32_LIN: # %bb.0: 842 ; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 843 ; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0 844 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax 845 ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 846 ; AVX512DQVL_32_LIN-NEXT: vzeroupper 847 ; AVX512DQVL_32_LIN-NEXT: retl 848 ; 849 ; AVX512_64-LABEL: d_to_s64: 850 ; AVX512_64: # %bb.0: 851 ; AVX512_64-NEXT: vcvttsd2si %xmm0, %rax 852 ; AVX512_64-NEXT: retq 853 ; 854 ; AVX512DQ_32_WIN-LABEL: d_to_s64: 855 ; AVX512DQ_32_WIN: # %bb.0: 856 ; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 857 ; AVX512DQ_32_WIN-NEXT: vcvttpd2qq %zmm0, %zmm0 858 ; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax 859 ; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx 860 ; AVX512DQ_32_WIN-NEXT: vzeroupper 861 ; AVX512DQ_32_WIN-NEXT: retl 862 ; 863 ; AVX512DQ_32_LIN-LABEL: d_to_s64: 864 ; AVX512DQ_32_LIN: # %bb.0: 865 ; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 866 ; AVX512DQ_32_LIN-NEXT: vcvttpd2qq %zmm0, %zmm0 867 ; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax 868 ; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx 869 ; AVX512DQ_32_LIN-NEXT: vzeroupper 870 ; AVX512DQ_32_LIN-NEXT: retl 871 ; 872 ; AVX512F_32_WIN-LABEL: d_to_s64: 873 ; AVX512F_32_WIN: # %bb.0: 874 ; AVX512F_32_WIN-NEXT: pushl %ebp 875 ; AVX512F_32_WIN-NEXT: movl %esp, %ebp 876 ; AVX512F_32_WIN-NEXT: andl $-8, %esp 877 ; AVX512F_32_WIN-NEXT: subl $16, %esp 878 ; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 879 ; AVX512F_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 880 ; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 881 ; AVX512F_32_WIN-NEXT: fisttpll (%esp) 882 ; AVX512F_32_WIN-NEXT: movl (%esp), %eax 883 ; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 884 ; AVX512F_32_WIN-NEXT: movl %ebp, %esp 885 ; AVX512F_32_WIN-NEXT: popl %ebp 886 ; AVX512F_32_WIN-NEXT: retl 887 ; 888 ; AVX512F_32_LIN-LABEL: d_to_s64: 889 ; AVX512F_32_LIN: # %bb.0: 890 ; AVX512F_32_LIN-NEXT: subl $20, %esp 891 ; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 892 ; AVX512F_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 893 ; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 894 ; AVX512F_32_LIN-NEXT: fisttpll (%esp) 895 ; AVX512F_32_LIN-NEXT: movl (%esp), %eax 896 ; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 897 ; AVX512F_32_LIN-NEXT: addl $20, %esp 898 ; AVX512F_32_LIN-NEXT: retl 899 ; 900 ; SSE3_32_WIN-LABEL: d_to_s64: 901 ; SSE3_32_WIN: # %bb.0: 902 ; SSE3_32_WIN-NEXT: pushl %ebp 903 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 904 ; SSE3_32_WIN-NEXT: andl $-8, %esp 905 ; SSE3_32_WIN-NEXT: subl $16, %esp 906 ; SSE3_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 907 ; SSE3_32_WIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 908 ; SSE3_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 909 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 910 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 911 ; SSE3_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 912 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 913 ; SSE3_32_WIN-NEXT: popl %ebp 914 ; SSE3_32_WIN-NEXT: retl 915 ; 916 ; SSE3_32_LIN-LABEL: d_to_s64: 917 ; SSE3_32_LIN: # %bb.0: 918 ; SSE3_32_LIN-NEXT: subl $20, %esp 919 ; SSE3_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 920 ; SSE3_32_LIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 921 ; SSE3_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 922 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 923 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 924 ; SSE3_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 925 ; SSE3_32_LIN-NEXT: addl $20, %esp 926 ; SSE3_32_LIN-NEXT: retl 927 ; 928 ; SSE3_64-LABEL: d_to_s64: 929 ; SSE3_64: # %bb.0: 930 ; SSE3_64-NEXT: cvttsd2si %xmm0, %rax 931 ; SSE3_64-NEXT: retq 932 ; 933 ; SSE2_32_WIN-LABEL: d_to_s64: 934 ; SSE2_32_WIN: # %bb.0: 935 ; SSE2_32_WIN-NEXT: pushl %ebp 936 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 937 ; SSE2_32_WIN-NEXT: andl $-8, %esp 938 ; SSE2_32_WIN-NEXT: subl $24, %esp 939 ; SSE2_32_WIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 940 ; SSE2_32_WIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 941 ; SSE2_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) 942 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 943 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 944 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 945 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 946 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 947 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 948 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 949 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 950 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 951 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 952 ; SSE2_32_WIN-NEXT: popl %ebp 953 ; SSE2_32_WIN-NEXT: retl 954 ; 955 ; SSE2_32_LIN-LABEL: d_to_s64: 956 ; SSE2_32_LIN: # %bb.0: 957 ; SSE2_32_LIN-NEXT: subl $28, %esp 958 ; SSE2_32_LIN-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 959 ; SSE2_32_LIN-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 960 ; SSE2_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) 961 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 962 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 963 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 964 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 965 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 966 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 967 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 968 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 969 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 970 ; SSE2_32_LIN-NEXT: addl $28, %esp 971 ; SSE2_32_LIN-NEXT: retl 972 ; 973 ; SSE2_64-LABEL: d_to_s64: 974 ; SSE2_64: # %bb.0: 975 ; SSE2_64-NEXT: cvttsd2si %xmm0, %rax 976 ; SSE2_64-NEXT: retq 977 ; 978 ; X87_WIN-LABEL: d_to_s64: 979 ; X87_WIN: # %bb.0: 980 ; X87_WIN-NEXT: pushl %ebp 981 ; X87_WIN-NEXT: movl %esp, %ebp 982 ; X87_WIN-NEXT: andl $-8, %esp 983 ; X87_WIN-NEXT: subl $16, %esp 984 ; X87_WIN-NEXT: fldl 8(%ebp) 985 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 986 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 987 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 988 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 989 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 990 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 991 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 992 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 993 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 994 ; X87_WIN-NEXT: movl %ebp, %esp 995 ; X87_WIN-NEXT: popl %ebp 996 ; X87_WIN-NEXT: retl 997 ; 998 ; X87_LIN-LABEL: d_to_s64: 999 ; X87_LIN: # %bb.0: 1000 ; X87_LIN-NEXT: subl $20, %esp 1001 ; X87_LIN-NEXT: fldl {{[0-9]+}}(%esp) 1002 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1003 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1004 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1005 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1006 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1007 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 1008 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1009 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1010 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1011 ; X87_LIN-NEXT: addl $20, %esp 1012 ; X87_LIN-NEXT: retl 1013 %r = fptosi double %a to i64 1014 ret i64 %r 1015 } 1016 1017 define i64 @x_to_u64(x86_fp80 %a) nounwind { 1018 ; AVX512_32_WIN-LABEL: x_to_u64: 1019 ; AVX512_32_WIN: # %bb.0: 1020 ; AVX512_32_WIN-NEXT: pushl %ebp 1021 ; AVX512_32_WIN-NEXT: movl %esp, %ebp 1022 ; AVX512_32_WIN-NEXT: andl $-8, %esp 1023 ; AVX512_32_WIN-NEXT: subl $8, %esp 1024 ; AVX512_32_WIN-NEXT: fldt 8(%ebp) 1025 ; AVX512_32_WIN-NEXT: flds __real@5f000000 1026 ; AVX512_32_WIN-NEXT: fld %st(1) 1027 ; AVX512_32_WIN-NEXT: fsub %st(1) 1028 ; AVX512_32_WIN-NEXT: xorl %edx, %edx 1029 ; AVX512_32_WIN-NEXT: fxch %st(1) 1030 ; AVX512_32_WIN-NEXT: fucompi %st(2) 1031 ; AVX512_32_WIN-NEXT: fcmovnbe %st(1), %st(0) 1032 ; AVX512_32_WIN-NEXT: fstp %st(1) 1033 ; AVX512_32_WIN-NEXT: fisttpll (%esp) 1034 ; AVX512_32_WIN-NEXT: setbe %dl 1035 ; AVX512_32_WIN-NEXT: shll $31, %edx 1036 ; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1037 ; AVX512_32_WIN-NEXT: movl (%esp), %eax 1038 ; AVX512_32_WIN-NEXT: movl %ebp, %esp 1039 ; AVX512_32_WIN-NEXT: popl %ebp 1040 ; AVX512_32_WIN-NEXT: retl 1041 ; 1042 ; AVX512_32_LIN-LABEL: x_to_u64: 1043 ; AVX512_32_LIN: # %bb.0: 1044 ; AVX512_32_LIN-NEXT: subl $12, %esp 1045 ; AVX512_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1046 ; AVX512_32_LIN-NEXT: flds {{\.LCPI.*}} 1047 ; AVX512_32_LIN-NEXT: fld %st(1) 1048 ; AVX512_32_LIN-NEXT: fsub %st(1) 1049 ; AVX512_32_LIN-NEXT: xorl %edx, %edx 1050 ; AVX512_32_LIN-NEXT: fxch %st(1) 1051 ; AVX512_32_LIN-NEXT: fucompi %st(2) 1052 ; AVX512_32_LIN-NEXT: fcmovnbe %st(1), %st(0) 1053 ; AVX512_32_LIN-NEXT: fstp %st(1) 1054 ; AVX512_32_LIN-NEXT: fisttpll (%esp) 1055 ; AVX512_32_LIN-NEXT: setbe %dl 1056 ; AVX512_32_LIN-NEXT: shll $31, %edx 1057 ; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1058 ; AVX512_32_LIN-NEXT: movl (%esp), %eax 1059 ; AVX512_32_LIN-NEXT: addl $12, %esp 1060 ; AVX512_32_LIN-NEXT: retl 1061 ; 1062 ; AVX512_64_WIN-LABEL: x_to_u64: 1063 ; AVX512_64_WIN: # %bb.0: 1064 ; AVX512_64_WIN-NEXT: pushq %rax 1065 ; AVX512_64_WIN-NEXT: fldt (%rcx) 1066 ; AVX512_64_WIN-NEXT: flds __real@{{.*}}(%rip) 1067 ; AVX512_64_WIN-NEXT: fld %st(1) 1068 ; AVX512_64_WIN-NEXT: fsub %st(1) 1069 ; AVX512_64_WIN-NEXT: xorl %ecx, %ecx 1070 ; AVX512_64_WIN-NEXT: fxch %st(1) 1071 ; AVX512_64_WIN-NEXT: fucompi %st(2) 1072 ; AVX512_64_WIN-NEXT: fcmovnbe %st(1), %st(0) 1073 ; AVX512_64_WIN-NEXT: fstp %st(1) 1074 ; AVX512_64_WIN-NEXT: fisttpll (%rsp) 1075 ; AVX512_64_WIN-NEXT: setbe %cl 1076 ; AVX512_64_WIN-NEXT: shll $31, %ecx 1077 ; AVX512_64_WIN-NEXT: xorl {{[0-9]+}}(%rsp), %ecx 1078 ; AVX512_64_WIN-NEXT: shlq $32, %rcx 1079 ; AVX512_64_WIN-NEXT: movl (%rsp), %eax 1080 ; AVX512_64_WIN-NEXT: orq %rcx, %rax 1081 ; AVX512_64_WIN-NEXT: popq %rcx 1082 ; AVX512_64_WIN-NEXT: retq 1083 ; 1084 ; AVX512_64_LIN-LABEL: x_to_u64: 1085 ; AVX512_64_LIN: # %bb.0: 1086 ; AVX512_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1087 ; AVX512_64_LIN-NEXT: flds {{.*}}(%rip) 1088 ; AVX512_64_LIN-NEXT: fld %st(1) 1089 ; AVX512_64_LIN-NEXT: fsub %st(1) 1090 ; AVX512_64_LIN-NEXT: xorl %ecx, %ecx 1091 ; AVX512_64_LIN-NEXT: fxch %st(1) 1092 ; AVX512_64_LIN-NEXT: fucompi %st(2) 1093 ; AVX512_64_LIN-NEXT: fcmovnbe %st(1), %st(0) 1094 ; AVX512_64_LIN-NEXT: fstp %st(1) 1095 ; AVX512_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) 1096 ; AVX512_64_LIN-NEXT: setbe %cl 1097 ; AVX512_64_LIN-NEXT: shll $31, %ecx 1098 ; AVX512_64_LIN-NEXT: xorl -{{[0-9]+}}(%rsp), %ecx 1099 ; AVX512_64_LIN-NEXT: shlq $32, %rcx 1100 ; AVX512_64_LIN-NEXT: movl -{{[0-9]+}}(%rsp), %eax 1101 ; AVX512_64_LIN-NEXT: orq %rcx, %rax 1102 ; AVX512_64_LIN-NEXT: retq 1103 ; 1104 ; SSE3_32_WIN-LABEL: x_to_u64: 1105 ; SSE3_32_WIN: # %bb.0: 1106 ; SSE3_32_WIN-NEXT: pushl %ebp 1107 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 1108 ; SSE3_32_WIN-NEXT: andl $-8, %esp 1109 ; SSE3_32_WIN-NEXT: subl $8, %esp 1110 ; SSE3_32_WIN-NEXT: fldt 8(%ebp) 1111 ; SSE3_32_WIN-NEXT: flds __real@5f000000 1112 ; SSE3_32_WIN-NEXT: fld %st(1) 1113 ; SSE3_32_WIN-NEXT: fsub %st(1) 1114 ; SSE3_32_WIN-NEXT: xorl %edx, %edx 1115 ; SSE3_32_WIN-NEXT: fxch %st(1) 1116 ; SSE3_32_WIN-NEXT: fucompi %st(2) 1117 ; SSE3_32_WIN-NEXT: fcmovnbe %st(1), %st(0) 1118 ; SSE3_32_WIN-NEXT: fstp %st(1) 1119 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 1120 ; SSE3_32_WIN-NEXT: setbe %dl 1121 ; SSE3_32_WIN-NEXT: shll $31, %edx 1122 ; SSE3_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1123 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 1124 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 1125 ; SSE3_32_WIN-NEXT: popl %ebp 1126 ; SSE3_32_WIN-NEXT: retl 1127 ; 1128 ; SSE3_32_LIN-LABEL: x_to_u64: 1129 ; SSE3_32_LIN: # %bb.0: 1130 ; SSE3_32_LIN-NEXT: subl $12, %esp 1131 ; SSE3_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1132 ; SSE3_32_LIN-NEXT: flds {{\.LCPI.*}} 1133 ; SSE3_32_LIN-NEXT: fld %st(1) 1134 ; SSE3_32_LIN-NEXT: fsub %st(1) 1135 ; SSE3_32_LIN-NEXT: xorl %edx, %edx 1136 ; SSE3_32_LIN-NEXT: fxch %st(1) 1137 ; SSE3_32_LIN-NEXT: fucompi %st(2) 1138 ; SSE3_32_LIN-NEXT: fcmovnbe %st(1), %st(0) 1139 ; SSE3_32_LIN-NEXT: fstp %st(1) 1140 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 1141 ; SSE3_32_LIN-NEXT: setbe %dl 1142 ; SSE3_32_LIN-NEXT: shll $31, %edx 1143 ; SSE3_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1144 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 1145 ; SSE3_32_LIN-NEXT: addl $12, %esp 1146 ; SSE3_32_LIN-NEXT: retl 1147 ; 1148 ; SSE3_64_WIN-LABEL: x_to_u64: 1149 ; SSE3_64_WIN: # %bb.0: 1150 ; SSE3_64_WIN-NEXT: subq $16, %rsp 1151 ; SSE3_64_WIN-NEXT: fldt (%rcx) 1152 ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) 1153 ; SSE3_64_WIN-NEXT: fld %st(1) 1154 ; SSE3_64_WIN-NEXT: fsub %st(1) 1155 ; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp) 1156 ; SSE3_64_WIN-NEXT: fld %st(1) 1157 ; SSE3_64_WIN-NEXT: fisttpll (%rsp) 1158 ; SSE3_64_WIN-NEXT: fucompi %st(1) 1159 ; SSE3_64_WIN-NEXT: fstp %st(0) 1160 ; SSE3_64_WIN-NEXT: jbe .LBB4_1 1161 ; SSE3_64_WIN-NEXT: # %bb.2: 1162 ; SSE3_64_WIN-NEXT: movq (%rsp), %rax 1163 ; SSE3_64_WIN-NEXT: addq $16, %rsp 1164 ; SSE3_64_WIN-NEXT: retq 1165 ; SSE3_64_WIN-NEXT: .LBB4_1: 1166 ; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1167 ; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax 1168 ; SSE3_64_WIN-NEXT: addq $16, %rsp 1169 ; SSE3_64_WIN-NEXT: retq 1170 ; 1171 ; SSE3_64_LIN-LABEL: x_to_u64: 1172 ; SSE3_64_LIN: # %bb.0: 1173 ; SSE3_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1174 ; SSE3_64_LIN-NEXT: flds {{.*}}(%rip) 1175 ; SSE3_64_LIN-NEXT: fld %st(1) 1176 ; SSE3_64_LIN-NEXT: fsub %st(1) 1177 ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) 1178 ; SSE3_64_LIN-NEXT: fld %st(1) 1179 ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) 1180 ; SSE3_64_LIN-NEXT: fucompi %st(1) 1181 ; SSE3_64_LIN-NEXT: fstp %st(0) 1182 ; SSE3_64_LIN-NEXT: jbe .LBB4_1 1183 ; SSE3_64_LIN-NEXT: # %bb.2: 1184 ; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1185 ; SSE3_64_LIN-NEXT: retq 1186 ; SSE3_64_LIN-NEXT: .LBB4_1: 1187 ; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1188 ; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax 1189 ; SSE3_64_LIN-NEXT: retq 1190 ; 1191 ; SSE2_32_WIN-LABEL: x_to_u64: 1192 ; SSE2_32_WIN: # %bb.0: 1193 ; SSE2_32_WIN-NEXT: pushl %ebp 1194 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 1195 ; SSE2_32_WIN-NEXT: andl $-8, %esp 1196 ; SSE2_32_WIN-NEXT: subl $16, %esp 1197 ; SSE2_32_WIN-NEXT: fldt 8(%ebp) 1198 ; SSE2_32_WIN-NEXT: flds __real@5f000000 1199 ; SSE2_32_WIN-NEXT: fld %st(1) 1200 ; SSE2_32_WIN-NEXT: fsub %st(1) 1201 ; SSE2_32_WIN-NEXT: xorl %edx, %edx 1202 ; SSE2_32_WIN-NEXT: fxch %st(1) 1203 ; SSE2_32_WIN-NEXT: fucompi %st(2) 1204 ; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st(0) 1205 ; SSE2_32_WIN-NEXT: fstp %st(1) 1206 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1207 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1208 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1209 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1210 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1211 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 1212 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1213 ; SSE2_32_WIN-NEXT: setbe %dl 1214 ; SSE2_32_WIN-NEXT: shll $31, %edx 1215 ; SSE2_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1216 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1217 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 1218 ; SSE2_32_WIN-NEXT: popl %ebp 1219 ; SSE2_32_WIN-NEXT: retl 1220 ; 1221 ; SSE2_32_LIN-LABEL: x_to_u64: 1222 ; SSE2_32_LIN: # %bb.0: 1223 ; SSE2_32_LIN-NEXT: subl $20, %esp 1224 ; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1225 ; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}} 1226 ; SSE2_32_LIN-NEXT: fld %st(1) 1227 ; SSE2_32_LIN-NEXT: fsub %st(1) 1228 ; SSE2_32_LIN-NEXT: xorl %edx, %edx 1229 ; SSE2_32_LIN-NEXT: fxch %st(1) 1230 ; SSE2_32_LIN-NEXT: fucompi %st(2) 1231 ; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st(0) 1232 ; SSE2_32_LIN-NEXT: fstp %st(1) 1233 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1234 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1235 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1236 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1237 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1238 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 1239 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1240 ; SSE2_32_LIN-NEXT: setbe %dl 1241 ; SSE2_32_LIN-NEXT: shll $31, %edx 1242 ; SSE2_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1243 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1244 ; SSE2_32_LIN-NEXT: addl $20, %esp 1245 ; SSE2_32_LIN-NEXT: retl 1246 ; 1247 ; SSE2_64_WIN-LABEL: x_to_u64: 1248 ; SSE2_64_WIN: # %bb.0: 1249 ; SSE2_64_WIN-NEXT: subq $24, %rsp 1250 ; SSE2_64_WIN-NEXT: fldt (%rcx) 1251 ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) 1252 ; SSE2_64_WIN-NEXT: fld %st(1) 1253 ; SSE2_64_WIN-NEXT: fsub %st(1) 1254 ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) 1255 ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax 1256 ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F 1257 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1258 ; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) 1259 ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) 1260 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1261 ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) 1262 ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax 1263 ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F 1264 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1265 ; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) 1266 ; SSE2_64_WIN-NEXT: fld %st(1) 1267 ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) 1268 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1269 ; SSE2_64_WIN-NEXT: fucompi %st(1) 1270 ; SSE2_64_WIN-NEXT: fstp %st(0) 1271 ; SSE2_64_WIN-NEXT: jbe .LBB4_1 1272 ; SSE2_64_WIN-NEXT: # %bb.2: 1273 ; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax 1274 ; SSE2_64_WIN-NEXT: addq $24, %rsp 1275 ; SSE2_64_WIN-NEXT: retq 1276 ; SSE2_64_WIN-NEXT: .LBB4_1: 1277 ; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1278 ; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax 1279 ; SSE2_64_WIN-NEXT: addq $24, %rsp 1280 ; SSE2_64_WIN-NEXT: retq 1281 ; 1282 ; SSE2_64_LIN-LABEL: x_to_u64: 1283 ; SSE2_64_LIN: # %bb.0: 1284 ; SSE2_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1285 ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) 1286 ; SSE2_64_LIN-NEXT: fld %st(1) 1287 ; SSE2_64_LIN-NEXT: fsub %st(1) 1288 ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) 1289 ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 1290 ; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F 1291 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1292 ; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 1293 ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) 1294 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1295 ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) 1296 ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 1297 ; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F 1298 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1299 ; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 1300 ; SSE2_64_LIN-NEXT: fld %st(1) 1301 ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) 1302 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1303 ; SSE2_64_LIN-NEXT: fucompi %st(1) 1304 ; SSE2_64_LIN-NEXT: fstp %st(0) 1305 ; SSE2_64_LIN-NEXT: jbe .LBB4_1 1306 ; SSE2_64_LIN-NEXT: # %bb.2: 1307 ; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1308 ; SSE2_64_LIN-NEXT: retq 1309 ; SSE2_64_LIN-NEXT: .LBB4_1: 1310 ; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 1311 ; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax 1312 ; SSE2_64_LIN-NEXT: retq 1313 ; 1314 ; X87_WIN-LABEL: x_to_u64: 1315 ; X87_WIN: # %bb.0: 1316 ; X87_WIN-NEXT: pushl %ebp 1317 ; X87_WIN-NEXT: movl %esp, %ebp 1318 ; X87_WIN-NEXT: andl $-8, %esp 1319 ; X87_WIN-NEXT: subl $16, %esp 1320 ; X87_WIN-NEXT: fldt 8(%ebp) 1321 ; X87_WIN-NEXT: flds __real@5f000000 1322 ; X87_WIN-NEXT: fld %st(1) 1323 ; X87_WIN-NEXT: fsub %st(1) 1324 ; X87_WIN-NEXT: fxch %st(1) 1325 ; X87_WIN-NEXT: fucomp %st(2) 1326 ; X87_WIN-NEXT: fnstsw %ax 1327 ; X87_WIN-NEXT: # kill: def $ah killed $ah killed $ax 1328 ; X87_WIN-NEXT: sahf 1329 ; X87_WIN-NEXT: ja LBB4_2 1330 ; X87_WIN-NEXT: # %bb.1: 1331 ; X87_WIN-NEXT: fstp %st(1) 1332 ; X87_WIN-NEXT: fldz 1333 ; X87_WIN-NEXT: LBB4_2: 1334 ; X87_WIN-NEXT: fstp %st(0) 1335 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1336 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1337 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1338 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1339 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1340 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 1341 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1342 ; X87_WIN-NEXT: setbe %al 1343 ; X87_WIN-NEXT: movzbl %al, %edx 1344 ; X87_WIN-NEXT: shll $31, %edx 1345 ; X87_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1346 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1347 ; X87_WIN-NEXT: movl %ebp, %esp 1348 ; X87_WIN-NEXT: popl %ebp 1349 ; X87_WIN-NEXT: retl 1350 ; 1351 ; X87_LIN-LABEL: x_to_u64: 1352 ; X87_LIN: # %bb.0: 1353 ; X87_LIN-NEXT: subl $20, %esp 1354 ; X87_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1355 ; X87_LIN-NEXT: flds {{\.LCPI.*}} 1356 ; X87_LIN-NEXT: fld %st(1) 1357 ; X87_LIN-NEXT: fsub %st(1) 1358 ; X87_LIN-NEXT: fxch %st(1) 1359 ; X87_LIN-NEXT: fucomp %st(2) 1360 ; X87_LIN-NEXT: fnstsw %ax 1361 ; X87_LIN-NEXT: # kill: def $ah killed $ah killed $ax 1362 ; X87_LIN-NEXT: sahf 1363 ; X87_LIN-NEXT: ja .LBB4_2 1364 ; X87_LIN-NEXT: # %bb.1: 1365 ; X87_LIN-NEXT: fstp %st(1) 1366 ; X87_LIN-NEXT: fldz 1367 ; X87_LIN-NEXT: .LBB4_2: 1368 ; X87_LIN-NEXT: fstp %st(0) 1369 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1370 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1371 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1372 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1373 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1374 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 1375 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1376 ; X87_LIN-NEXT: setbe %al 1377 ; X87_LIN-NEXT: movzbl %al, %edx 1378 ; X87_LIN-NEXT: shll $31, %edx 1379 ; X87_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx 1380 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1381 ; X87_LIN-NEXT: addl $20, %esp 1382 ; X87_LIN-NEXT: retl 1383 %r = fptoui x86_fp80 %a to i64 1384 ret i64 %r 1385 } 1386 1387 define i64 @x_to_s64(x86_fp80 %a) nounwind { 1388 ; AVX512_32_WIN-LABEL: x_to_s64: 1389 ; AVX512_32_WIN: # %bb.0: 1390 ; AVX512_32_WIN-NEXT: pushl %ebp 1391 ; AVX512_32_WIN-NEXT: movl %esp, %ebp 1392 ; AVX512_32_WIN-NEXT: andl $-8, %esp 1393 ; AVX512_32_WIN-NEXT: subl $8, %esp 1394 ; AVX512_32_WIN-NEXT: fldt 8(%ebp) 1395 ; AVX512_32_WIN-NEXT: fisttpll (%esp) 1396 ; AVX512_32_WIN-NEXT: movl (%esp), %eax 1397 ; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1398 ; AVX512_32_WIN-NEXT: movl %ebp, %esp 1399 ; AVX512_32_WIN-NEXT: popl %ebp 1400 ; AVX512_32_WIN-NEXT: retl 1401 ; 1402 ; AVX512_32_LIN-LABEL: x_to_s64: 1403 ; AVX512_32_LIN: # %bb.0: 1404 ; AVX512_32_LIN-NEXT: subl $12, %esp 1405 ; AVX512_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1406 ; AVX512_32_LIN-NEXT: fisttpll (%esp) 1407 ; AVX512_32_LIN-NEXT: movl (%esp), %eax 1408 ; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1409 ; AVX512_32_LIN-NEXT: addl $12, %esp 1410 ; AVX512_32_LIN-NEXT: retl 1411 ; 1412 ; AVX512_64_WIN-LABEL: x_to_s64: 1413 ; AVX512_64_WIN: # %bb.0: 1414 ; AVX512_64_WIN-NEXT: pushq %rax 1415 ; AVX512_64_WIN-NEXT: fldt (%rcx) 1416 ; AVX512_64_WIN-NEXT: fisttpll (%rsp) 1417 ; AVX512_64_WIN-NEXT: movq (%rsp), %rax 1418 ; AVX512_64_WIN-NEXT: popq %rcx 1419 ; AVX512_64_WIN-NEXT: retq 1420 ; 1421 ; AVX512_64_LIN-LABEL: x_to_s64: 1422 ; AVX512_64_LIN: # %bb.0: 1423 ; AVX512_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1424 ; AVX512_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) 1425 ; AVX512_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1426 ; AVX512_64_LIN-NEXT: retq 1427 ; 1428 ; SSE3_32_WIN-LABEL: x_to_s64: 1429 ; SSE3_32_WIN: # %bb.0: 1430 ; SSE3_32_WIN-NEXT: pushl %ebp 1431 ; SSE3_32_WIN-NEXT: movl %esp, %ebp 1432 ; SSE3_32_WIN-NEXT: andl $-8, %esp 1433 ; SSE3_32_WIN-NEXT: subl $8, %esp 1434 ; SSE3_32_WIN-NEXT: fldt 8(%ebp) 1435 ; SSE3_32_WIN-NEXT: fisttpll (%esp) 1436 ; SSE3_32_WIN-NEXT: movl (%esp), %eax 1437 ; SSE3_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1438 ; SSE3_32_WIN-NEXT: movl %ebp, %esp 1439 ; SSE3_32_WIN-NEXT: popl %ebp 1440 ; SSE3_32_WIN-NEXT: retl 1441 ; 1442 ; SSE3_32_LIN-LABEL: x_to_s64: 1443 ; SSE3_32_LIN: # %bb.0: 1444 ; SSE3_32_LIN-NEXT: subl $12, %esp 1445 ; SSE3_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1446 ; SSE3_32_LIN-NEXT: fisttpll (%esp) 1447 ; SSE3_32_LIN-NEXT: movl (%esp), %eax 1448 ; SSE3_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1449 ; SSE3_32_LIN-NEXT: addl $12, %esp 1450 ; SSE3_32_LIN-NEXT: retl 1451 ; 1452 ; SSE3_64_WIN-LABEL: x_to_s64: 1453 ; SSE3_64_WIN: # %bb.0: 1454 ; SSE3_64_WIN-NEXT: pushq %rax 1455 ; SSE3_64_WIN-NEXT: fldt (%rcx) 1456 ; SSE3_64_WIN-NEXT: fisttpll (%rsp) 1457 ; SSE3_64_WIN-NEXT: movq (%rsp), %rax 1458 ; SSE3_64_WIN-NEXT: popq %rcx 1459 ; SSE3_64_WIN-NEXT: retq 1460 ; 1461 ; SSE3_64_LIN-LABEL: x_to_s64: 1462 ; SSE3_64_LIN: # %bb.0: 1463 ; SSE3_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1464 ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) 1465 ; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1466 ; SSE3_64_LIN-NEXT: retq 1467 ; 1468 ; SSE2_32_WIN-LABEL: x_to_s64: 1469 ; SSE2_32_WIN: # %bb.0: 1470 ; SSE2_32_WIN-NEXT: pushl %ebp 1471 ; SSE2_32_WIN-NEXT: movl %esp, %ebp 1472 ; SSE2_32_WIN-NEXT: andl $-8, %esp 1473 ; SSE2_32_WIN-NEXT: subl $16, %esp 1474 ; SSE2_32_WIN-NEXT: fldt 8(%ebp) 1475 ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1476 ; SSE2_32_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1477 ; SSE2_32_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1478 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1479 ; SSE2_32_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1480 ; SSE2_32_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 1481 ; SSE2_32_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1482 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1483 ; SSE2_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1484 ; SSE2_32_WIN-NEXT: movl %ebp, %esp 1485 ; SSE2_32_WIN-NEXT: popl %ebp 1486 ; SSE2_32_WIN-NEXT: retl 1487 ; 1488 ; SSE2_32_LIN-LABEL: x_to_s64: 1489 ; SSE2_32_LIN: # %bb.0: 1490 ; SSE2_32_LIN-NEXT: subl $20, %esp 1491 ; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1492 ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1493 ; SSE2_32_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1494 ; SSE2_32_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1495 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1496 ; SSE2_32_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1497 ; SSE2_32_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 1498 ; SSE2_32_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1499 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1500 ; SSE2_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1501 ; SSE2_32_LIN-NEXT: addl $20, %esp 1502 ; SSE2_32_LIN-NEXT: retl 1503 ; 1504 ; SSE2_64_WIN-LABEL: x_to_s64: 1505 ; SSE2_64_WIN: # %bb.0: 1506 ; SSE2_64_WIN-NEXT: subq $16, %rsp 1507 ; SSE2_64_WIN-NEXT: fldt (%rcx) 1508 ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) 1509 ; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax 1510 ; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F 1511 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1512 ; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp) 1513 ; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp) 1514 ; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp) 1515 ; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax 1516 ; SSE2_64_WIN-NEXT: addq $16, %rsp 1517 ; SSE2_64_WIN-NEXT: retq 1518 ; 1519 ; SSE2_64_LIN-LABEL: x_to_s64: 1520 ; SSE2_64_LIN: # %bb.0: 1521 ; SSE2_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) 1522 ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) 1523 ; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 1524 ; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F 1525 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1526 ; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 1527 ; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp) 1528 ; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp) 1529 ; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1530 ; SSE2_64_LIN-NEXT: retq 1531 ; 1532 ; X87_WIN-LABEL: x_to_s64: 1533 ; X87_WIN: # %bb.0: 1534 ; X87_WIN-NEXT: pushl %ebp 1535 ; X87_WIN-NEXT: movl %esp, %ebp 1536 ; X87_WIN-NEXT: andl $-8, %esp 1537 ; X87_WIN-NEXT: subl $16, %esp 1538 ; X87_WIN-NEXT: fldt 8(%ebp) 1539 ; X87_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1540 ; X87_WIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1541 ; X87_WIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1542 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1543 ; X87_WIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1544 ; X87_WIN-NEXT: fistpll {{[0-9]+}}(%esp) 1545 ; X87_WIN-NEXT: fldcw {{[0-9]+}}(%esp) 1546 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1547 ; X87_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1548 ; X87_WIN-NEXT: movl %ebp, %esp 1549 ; X87_WIN-NEXT: popl %ebp 1550 ; X87_WIN-NEXT: retl 1551 ; 1552 ; X87_LIN-LABEL: x_to_s64: 1553 ; X87_LIN: # %bb.0: 1554 ; X87_LIN-NEXT: subl $20, %esp 1555 ; X87_LIN-NEXT: fldt {{[0-9]+}}(%esp) 1556 ; X87_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) 1557 ; X87_LIN-NEXT: movzwl {{[0-9]+}}(%esp), %eax 1558 ; X87_LIN-NEXT: movw $3199, {{[0-9]+}}(%esp) # imm = 0xC7F 1559 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1560 ; X87_LIN-NEXT: movw %ax, {{[0-9]+}}(%esp) 1561 ; X87_LIN-NEXT: fistpll {{[0-9]+}}(%esp) 1562 ; X87_LIN-NEXT: fldcw {{[0-9]+}}(%esp) 1563 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %eax 1564 ; X87_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx 1565 ; X87_LIN-NEXT: addl $20, %esp 1566 ; X87_LIN-NEXT: retl 1567 %r = fptosi x86_fp80 %a to i64 1568 ret i64 %r 1569 } 1570 1571 define i64 @t_to_u64(fp128 %a) nounwind { 1572 ; AVX512_32_WIN-LABEL: t_to_u64: 1573 ; AVX512_32_WIN: # %bb.0: 1574 ; AVX512_32_WIN-NEXT: subl $16, %esp 1575 ; AVX512_32_WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 1576 ; AVX512_32_WIN-NEXT: vmovups %xmm0, (%esp) 1577 ; AVX512_32_WIN-NEXT: calll ___fixunstfdi 1578 ; AVX512_32_WIN-NEXT: addl $16, %esp 1579 ; AVX512_32_WIN-NEXT: retl 1580 ; 1581 ; AVX512_32_LIN-LABEL: t_to_u64: 1582 ; AVX512_32_LIN: # %bb.0: 1583 ; AVX512_32_LIN-NEXT: subl $28, %esp 1584 ; AVX512_32_LIN-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1585 ; AVX512_32_LIN-NEXT: vmovups %xmm0, (%esp) 1586 ; AVX512_32_LIN-NEXT: calll __fixunstfdi 1587 ; AVX512_32_LIN-NEXT: addl $28, %esp 1588 ; AVX512_32_LIN-NEXT: retl 1589 ; 1590 ; AVX512_64_WIN-LABEL: t_to_u64: 1591 ; AVX512_64_WIN: # %bb.0: 1592 ; AVX512_64_WIN-NEXT: subq $40, %rsp 1593 ; AVX512_64_WIN-NEXT: callq __fixunstfdi 1594 ; AVX512_64_WIN-NEXT: addq $40, %rsp 1595 ; AVX512_64_WIN-NEXT: retq 1596 ; 1597 ; AVX512_64_LIN-LABEL: t_to_u64: 1598 ; AVX512_64_LIN: # %bb.0: 1599 ; AVX512_64_LIN-NEXT: pushq %rax 1600 ; AVX512_64_LIN-NEXT: callq __fixunstfdi 1601 ; AVX512_64_LIN-NEXT: popq %rcx 1602 ; AVX512_64_LIN-NEXT: retq 1603 ; 1604 ; SSE3_32_WIN-LABEL: t_to_u64: 1605 ; SSE3_32_WIN: # %bb.0: 1606 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1607 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1608 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1609 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1610 ; SSE3_32_WIN-NEXT: calll ___fixunstfdi 1611 ; SSE3_32_WIN-NEXT: addl $16, %esp 1612 ; SSE3_32_WIN-NEXT: retl 1613 ; 1614 ; SSE3_32_LIN-LABEL: t_to_u64: 1615 ; SSE3_32_LIN: # %bb.0: 1616 ; SSE3_32_LIN-NEXT: subl $12, %esp 1617 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1618 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1619 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1620 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1621 ; SSE3_32_LIN-NEXT: calll __fixunstfdi 1622 ; SSE3_32_LIN-NEXT: addl $28, %esp 1623 ; SSE3_32_LIN-NEXT: retl 1624 ; 1625 ; SSE3_64_WIN-LABEL: t_to_u64: 1626 ; SSE3_64_WIN: # %bb.0: 1627 ; SSE3_64_WIN-NEXT: subq $40, %rsp 1628 ; SSE3_64_WIN-NEXT: callq __fixunstfdi 1629 ; SSE3_64_WIN-NEXT: addq $40, %rsp 1630 ; SSE3_64_WIN-NEXT: retq 1631 ; 1632 ; SSE3_64_LIN-LABEL: t_to_u64: 1633 ; SSE3_64_LIN: # %bb.0: 1634 ; SSE3_64_LIN-NEXT: pushq %rax 1635 ; SSE3_64_LIN-NEXT: callq __fixunstfdi 1636 ; SSE3_64_LIN-NEXT: popq %rcx 1637 ; SSE3_64_LIN-NEXT: retq 1638 ; 1639 ; SSE2_32_WIN-LABEL: t_to_u64: 1640 ; SSE2_32_WIN: # %bb.0: 1641 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1642 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1643 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1644 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1645 ; SSE2_32_WIN-NEXT: calll ___fixunstfdi 1646 ; SSE2_32_WIN-NEXT: addl $16, %esp 1647 ; SSE2_32_WIN-NEXT: retl 1648 ; 1649 ; SSE2_32_LIN-LABEL: t_to_u64: 1650 ; SSE2_32_LIN: # %bb.0: 1651 ; SSE2_32_LIN-NEXT: subl $12, %esp 1652 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1653 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1654 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1655 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1656 ; SSE2_32_LIN-NEXT: calll __fixunstfdi 1657 ; SSE2_32_LIN-NEXT: addl $28, %esp 1658 ; SSE2_32_LIN-NEXT: retl 1659 ; 1660 ; SSE2_64_WIN-LABEL: t_to_u64: 1661 ; SSE2_64_WIN: # %bb.0: 1662 ; SSE2_64_WIN-NEXT: subq $40, %rsp 1663 ; SSE2_64_WIN-NEXT: callq __fixunstfdi 1664 ; SSE2_64_WIN-NEXT: addq $40, %rsp 1665 ; SSE2_64_WIN-NEXT: retq 1666 ; 1667 ; SSE2_64_LIN-LABEL: t_to_u64: 1668 ; SSE2_64_LIN: # %bb.0: 1669 ; SSE2_64_LIN-NEXT: pushq %rax 1670 ; SSE2_64_LIN-NEXT: callq __fixunstfdi 1671 ; SSE2_64_LIN-NEXT: popq %rcx 1672 ; SSE2_64_LIN-NEXT: retq 1673 ; 1674 ; X87_WIN-LABEL: t_to_u64: 1675 ; X87_WIN: # %bb.0: 1676 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1677 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1678 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1679 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1680 ; X87_WIN-NEXT: calll ___fixunstfdi 1681 ; X87_WIN-NEXT: addl $16, %esp 1682 ; X87_WIN-NEXT: retl 1683 ; 1684 ; X87_LIN-LABEL: t_to_u64: 1685 ; X87_LIN: # %bb.0: 1686 ; X87_LIN-NEXT: subl $12, %esp 1687 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1688 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1689 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1690 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1691 ; X87_LIN-NEXT: calll __fixunstfdi 1692 ; X87_LIN-NEXT: addl $28, %esp 1693 ; X87_LIN-NEXT: retl 1694 %r = fptoui fp128 %a to i64 1695 ret i64 %r 1696 } 1697 1698 define i64 @t_to_s64(fp128 %a) nounwind { 1699 ; AVX512_32_WIN-LABEL: t_to_s64: 1700 ; AVX512_32_WIN: # %bb.0: 1701 ; AVX512_32_WIN-NEXT: subl $16, %esp 1702 ; AVX512_32_WIN-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0 1703 ; AVX512_32_WIN-NEXT: vmovups %xmm0, (%esp) 1704 ; AVX512_32_WIN-NEXT: calll ___fixtfdi 1705 ; AVX512_32_WIN-NEXT: addl $16, %esp 1706 ; AVX512_32_WIN-NEXT: retl 1707 ; 1708 ; AVX512_32_LIN-LABEL: t_to_s64: 1709 ; AVX512_32_LIN: # %bb.0: 1710 ; AVX512_32_LIN-NEXT: subl $28, %esp 1711 ; AVX512_32_LIN-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1712 ; AVX512_32_LIN-NEXT: vmovups %xmm0, (%esp) 1713 ; AVX512_32_LIN-NEXT: calll __fixtfdi 1714 ; AVX512_32_LIN-NEXT: addl $28, %esp 1715 ; AVX512_32_LIN-NEXT: retl 1716 ; 1717 ; AVX512_64_WIN-LABEL: t_to_s64: 1718 ; AVX512_64_WIN: # %bb.0: 1719 ; AVX512_64_WIN-NEXT: subq $40, %rsp 1720 ; AVX512_64_WIN-NEXT: callq __fixtfdi 1721 ; AVX512_64_WIN-NEXT: addq $40, %rsp 1722 ; AVX512_64_WIN-NEXT: retq 1723 ; 1724 ; AVX512_64_LIN-LABEL: t_to_s64: 1725 ; AVX512_64_LIN: # %bb.0: 1726 ; AVX512_64_LIN-NEXT: pushq %rax 1727 ; AVX512_64_LIN-NEXT: callq __fixtfdi 1728 ; AVX512_64_LIN-NEXT: popq %rcx 1729 ; AVX512_64_LIN-NEXT: retq 1730 ; 1731 ; SSE3_32_WIN-LABEL: t_to_s64: 1732 ; SSE3_32_WIN: # %bb.0: 1733 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1734 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1735 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1736 ; SSE3_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1737 ; SSE3_32_WIN-NEXT: calll ___fixtfdi 1738 ; SSE3_32_WIN-NEXT: addl $16, %esp 1739 ; SSE3_32_WIN-NEXT: retl 1740 ; 1741 ; SSE3_32_LIN-LABEL: t_to_s64: 1742 ; SSE3_32_LIN: # %bb.0: 1743 ; SSE3_32_LIN-NEXT: subl $12, %esp 1744 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1745 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1746 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1747 ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1748 ; SSE3_32_LIN-NEXT: calll __fixtfdi 1749 ; SSE3_32_LIN-NEXT: addl $28, %esp 1750 ; SSE3_32_LIN-NEXT: retl 1751 ; 1752 ; SSE3_64_WIN-LABEL: t_to_s64: 1753 ; SSE3_64_WIN: # %bb.0: 1754 ; SSE3_64_WIN-NEXT: subq $40, %rsp 1755 ; SSE3_64_WIN-NEXT: callq __fixtfdi 1756 ; SSE3_64_WIN-NEXT: addq $40, %rsp 1757 ; SSE3_64_WIN-NEXT: retq 1758 ; 1759 ; SSE3_64_LIN-LABEL: t_to_s64: 1760 ; SSE3_64_LIN: # %bb.0: 1761 ; SSE3_64_LIN-NEXT: pushq %rax 1762 ; SSE3_64_LIN-NEXT: callq __fixtfdi 1763 ; SSE3_64_LIN-NEXT: popq %rcx 1764 ; SSE3_64_LIN-NEXT: retq 1765 ; 1766 ; SSE2_32_WIN-LABEL: t_to_s64: 1767 ; SSE2_32_WIN: # %bb.0: 1768 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1769 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1770 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1771 ; SSE2_32_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1772 ; SSE2_32_WIN-NEXT: calll ___fixtfdi 1773 ; SSE2_32_WIN-NEXT: addl $16, %esp 1774 ; SSE2_32_WIN-NEXT: retl 1775 ; 1776 ; SSE2_32_LIN-LABEL: t_to_s64: 1777 ; SSE2_32_LIN: # %bb.0: 1778 ; SSE2_32_LIN-NEXT: subl $12, %esp 1779 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1780 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1781 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1782 ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1783 ; SSE2_32_LIN-NEXT: calll __fixtfdi 1784 ; SSE2_32_LIN-NEXT: addl $28, %esp 1785 ; SSE2_32_LIN-NEXT: retl 1786 ; 1787 ; SSE2_64_WIN-LABEL: t_to_s64: 1788 ; SSE2_64_WIN: # %bb.0: 1789 ; SSE2_64_WIN-NEXT: subq $40, %rsp 1790 ; SSE2_64_WIN-NEXT: callq __fixtfdi 1791 ; SSE2_64_WIN-NEXT: addq $40, %rsp 1792 ; SSE2_64_WIN-NEXT: retq 1793 ; 1794 ; SSE2_64_LIN-LABEL: t_to_s64: 1795 ; SSE2_64_LIN: # %bb.0: 1796 ; SSE2_64_LIN-NEXT: pushq %rax 1797 ; SSE2_64_LIN-NEXT: callq __fixtfdi 1798 ; SSE2_64_LIN-NEXT: popq %rcx 1799 ; SSE2_64_LIN-NEXT: retq 1800 ; 1801 ; X87_WIN-LABEL: t_to_s64: 1802 ; X87_WIN: # %bb.0: 1803 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1804 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1805 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1806 ; X87_WIN-NEXT: pushl {{[0-9]+}}(%esp) 1807 ; X87_WIN-NEXT: calll ___fixtfdi 1808 ; X87_WIN-NEXT: addl $16, %esp 1809 ; X87_WIN-NEXT: retl 1810 ; 1811 ; X87_LIN-LABEL: t_to_s64: 1812 ; X87_LIN: # %bb.0: 1813 ; X87_LIN-NEXT: subl $12, %esp 1814 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1815 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1816 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1817 ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) 1818 ; X87_LIN-NEXT: calll __fixtfdi 1819 ; X87_LIN-NEXT: addl $28, %esp 1820 ; X87_LIN-NEXT: retl 1821 %r = fptosi fp128 %a to i64 1822 ret i64 %r 1823 } 1824