1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3 ; 4 ; 32-bit tests to make sure we're not doing anything stupid. 5 ; RUN: llc < %s -mtriple=i686-unknown-unknown 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 8 9 ; 10 ; Double to Signed Integer 11 ; 12 13 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { 14 ; SSE-LABEL: fptosi_2f64_to_2i64: 15 ; SSE: # BB#0: 16 ; SSE-NEXT: cvttsd2si %xmm0, %rax 17 ; SSE-NEXT: movd %rax, %xmm1 18 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 19 ; SSE-NEXT: cvttsd2si %xmm0, %rax 20 ; SSE-NEXT: movd %rax, %xmm0 21 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 22 ; SSE-NEXT: movdqa %xmm1, %xmm0 23 ; SSE-NEXT: retq 24 ; 25 ; AVX-LABEL: fptosi_2f64_to_2i64: 26 ; AVX: # BB#0: 27 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 28 ; AVX-NEXT: vmovq %rax, %xmm1 29 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 30 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 31 ; AVX-NEXT: vmovq %rax, %xmm0 32 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 33 ; AVX-NEXT: retq 34 %cvt = fptosi <2 x double> %a to <2 x i64> 35 ret <2 x i64> %cvt 36 } 37 38 define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { 39 ; SSE-LABEL: fptosi_2f64_to_2i32: 40 ; SSE: # BB#0: 41 ; SSE-NEXT: cvttsd2si %xmm0, %rax 42 ; SSE-NEXT: movd %rax, %xmm1 43 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 44 ; SSE-NEXT: cvttsd2si %xmm0, %rax 45 ; SSE-NEXT: movd %rax, %xmm0 46 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 47 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 48 ; SSE-NEXT: retq 49 ; 50 ; AVX-LABEL: fptosi_2f64_to_2i32: 51 ; AVX: # BB#0: 52 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 53 ; AVX-NEXT: vmovq %rax, %xmm1 54 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 55 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 56 ; AVX-NEXT: vmovq %rax, %xmm0 57 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 58 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 59 ; AVX-NEXT: retq 60 %cvt = fptosi <2 x double> %a to <2 x i32> 61 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 62 ret <4 x i32> %ext 63 } 64 65 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { 66 ; SSE-LABEL: fptosi_4f64_to_2i32: 67 ; SSE: # BB#0: 68 ; SSE-NEXT: cvttsd2si %xmm0, %rax 69 ; SSE-NEXT: movd %rax, %xmm1 70 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 71 ; SSE-NEXT: cvttsd2si %xmm0, %rax 72 ; SSE-NEXT: movd %rax, %xmm0 73 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 74 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 75 ; SSE-NEXT: cvttsd2si %xmm0, %rax 76 ; SSE-NEXT: movd %rax, %xmm1 77 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 78 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 79 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 80 ; SSE-NEXT: retq 81 ; 82 ; AVX-LABEL: fptosi_4f64_to_2i32: 83 ; AVX: # BB#0: 84 ; AVX-NEXT: # kill 85 ; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 86 ; AVX-NEXT: vzeroupper 87 ; AVX-NEXT: retq 88 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 89 %cvt = fptosi <4 x double> %ext to <4 x i32> 90 ret <4 x i32> %cvt 91 } 92 93 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { 94 ; SSE-LABEL: fptosi_4f64_to_4i64: 95 ; SSE: # BB#0: 96 ; SSE-NEXT: cvttsd2si %xmm0, %rax 97 ; SSE-NEXT: movd %rax, %xmm2 98 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 99 ; SSE-NEXT: cvttsd2si %xmm0, %rax 100 ; SSE-NEXT: movd %rax, %xmm0 101 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 102 ; SSE-NEXT: cvttsd2si %xmm1, %rax 103 ; SSE-NEXT: movd %rax, %xmm3 104 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 105 ; SSE-NEXT: cvttsd2si %xmm1, %rax 106 ; SSE-NEXT: movd %rax, %xmm0 107 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 108 ; SSE-NEXT: movdqa %xmm2, %xmm0 109 ; SSE-NEXT: movdqa %xmm3, %xmm1 110 ; SSE-NEXT: retq 111 ; 112 ; AVX-LABEL: fptosi_4f64_to_4i64: 113 ; AVX: # BB#0: 114 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 115 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 116 ; AVX-NEXT: vmovq %rax, %xmm2 117 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 118 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 119 ; AVX-NEXT: vmovq %rax, %xmm1 120 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 121 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 122 ; AVX-NEXT: vmovq %rax, %xmm2 123 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 124 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 125 ; AVX-NEXT: vmovq %rax, %xmm0 126 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 127 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 128 ; AVX-NEXT: retq 129 %cvt = fptosi <4 x double> %a to <4 x i64> 130 ret <4 x i64> %cvt 131 } 132 133 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { 134 ; SSE-LABEL: fptosi_4f64_to_4i32: 135 ; SSE: # BB#0: 136 ; SSE-NEXT: cvttsd2si %xmm1, %rax 137 ; SSE-NEXT: movd %rax, %xmm2 138 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 139 ; SSE-NEXT: cvttsd2si %xmm1, %rax 140 ; SSE-NEXT: movd %rax, %xmm1 141 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 142 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 143 ; SSE-NEXT: cvttsd2si %xmm0, %rax 144 ; SSE-NEXT: movd %rax, %xmm2 145 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 146 ; SSE-NEXT: cvttsd2si %xmm0, %rax 147 ; SSE-NEXT: movd %rax, %xmm0 148 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 149 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 150 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 151 ; SSE-NEXT: retq 152 ; 153 ; AVX-LABEL: fptosi_4f64_to_4i32: 154 ; AVX: # BB#0: 155 ; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 156 ; AVX-NEXT: vzeroupper 157 ; AVX-NEXT: retq 158 %cvt = fptosi <4 x double> %a to <4 x i32> 159 ret <4 x i32> %cvt 160 } 161 162 ; 163 ; Double to Unsigned Integer 164 ; 165 166 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { 167 ; SSE-LABEL: fptoui_2f64_to_2i64: 168 ; SSE: # BB#0: 169 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 170 ; SSE-NEXT: movapd %xmm0, %xmm1 171 ; SSE-NEXT: subsd %xmm2, %xmm1 172 ; SSE-NEXT: cvttsd2si %xmm1, %rax 173 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 174 ; SSE-NEXT: xorq %rcx, %rax 175 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 176 ; SSE-NEXT: ucomisd %xmm2, %xmm0 177 ; SSE-NEXT: cmovaeq %rax, %rdx 178 ; SSE-NEXT: movd %rdx, %xmm1 179 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 180 ; SSE-NEXT: movapd %xmm0, %xmm3 181 ; SSE-NEXT: subsd %xmm2, %xmm3 182 ; SSE-NEXT: cvttsd2si %xmm3, %rax 183 ; SSE-NEXT: xorq %rcx, %rax 184 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 185 ; SSE-NEXT: ucomisd %xmm2, %xmm0 186 ; SSE-NEXT: cmovaeq %rax, %rcx 187 ; SSE-NEXT: movd %rcx, %xmm0 188 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 189 ; SSE-NEXT: movdqa %xmm1, %xmm0 190 ; SSE-NEXT: retq 191 ; 192 ; AVX-LABEL: fptoui_2f64_to_2i64: 193 ; AVX: # BB#0: 194 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 195 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 196 ; AVX-NEXT: vcvttsd2si %xmm2, %rax 197 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 198 ; AVX-NEXT: xorq %rcx, %rax 199 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 200 ; AVX-NEXT: vucomisd %xmm1, %xmm0 201 ; AVX-NEXT: cmovaeq %rax, %rdx 202 ; AVX-NEXT: vmovq %rdx, %xmm2 203 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 204 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 205 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 206 ; AVX-NEXT: xorq %rcx, %rax 207 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 208 ; AVX-NEXT: vucomisd %xmm1, %xmm0 209 ; AVX-NEXT: cmovaeq %rax, %rcx 210 ; AVX-NEXT: vmovq %rcx, %xmm0 211 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 212 ; AVX-NEXT: retq 213 %cvt = fptoui <2 x double> %a to <2 x i64> 214 ret <2 x i64> %cvt 215 } 216 217 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { 218 ; SSE-LABEL: fptoui_2f64_to_2i32: 219 ; SSE: # BB#0: 220 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 221 ; SSE-NEXT: movapd %xmm0, %xmm2 222 ; SSE-NEXT: subsd %xmm1, %xmm2 223 ; SSE-NEXT: cvttsd2si %xmm2, %rax 224 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 225 ; SSE-NEXT: xorq %rcx, %rax 226 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 227 ; SSE-NEXT: ucomisd %xmm1, %xmm0 228 ; SSE-NEXT: cmovaeq %rax, %rdx 229 ; SSE-NEXT: movd %rdx, %xmm2 230 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 231 ; SSE-NEXT: movapd %xmm0, %xmm3 232 ; SSE-NEXT: subsd %xmm1, %xmm3 233 ; SSE-NEXT: cvttsd2si %xmm3, %rax 234 ; SSE-NEXT: xorq %rcx, %rax 235 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 236 ; SSE-NEXT: ucomisd %xmm1, %xmm0 237 ; SSE-NEXT: cmovaeq %rax, %rcx 238 ; SSE-NEXT: movd %rcx, %xmm0 239 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 240 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 241 ; SSE-NEXT: retq 242 ; 243 ; AVX-LABEL: fptoui_2f64_to_2i32: 244 ; AVX: # BB#0: 245 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 246 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 247 ; AVX-NEXT: vcvttsd2si %xmm2, %rax 248 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 249 ; AVX-NEXT: xorq %rcx, %rax 250 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 251 ; AVX-NEXT: vucomisd %xmm1, %xmm0 252 ; AVX-NEXT: cmovaeq %rax, %rdx 253 ; AVX-NEXT: vmovq %rdx, %xmm2 254 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 255 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 256 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 257 ; AVX-NEXT: xorq %rcx, %rax 258 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 259 ; AVX-NEXT: vucomisd %xmm1, %xmm0 260 ; AVX-NEXT: cmovaeq %rax, %rcx 261 ; AVX-NEXT: vmovq %rcx, %xmm0 262 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 263 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 264 ; AVX-NEXT: retq 265 %cvt = fptoui <2 x double> %a to <2 x i32> 266 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 267 ret <4 x i32> %ext 268 } 269 270 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { 271 ; SSE-LABEL: fptoui_4f64_to_2i32: 272 ; SSE: # BB#0: 273 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 274 ; SSE-NEXT: movapd %xmm0, %xmm2 275 ; SSE-NEXT: subsd %xmm1, %xmm2 276 ; SSE-NEXT: cvttsd2si %xmm2, %rax 277 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 278 ; SSE-NEXT: xorq %rcx, %rax 279 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 280 ; SSE-NEXT: ucomisd %xmm1, %xmm0 281 ; SSE-NEXT: cmovaeq %rax, %rdx 282 ; SSE-NEXT: movd %rdx, %xmm2 283 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 284 ; SSE-NEXT: movapd %xmm0, %xmm3 285 ; SSE-NEXT: subsd %xmm1, %xmm3 286 ; SSE-NEXT: cvttsd2si %xmm3, %rax 287 ; SSE-NEXT: xorq %rcx, %rax 288 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 289 ; SSE-NEXT: ucomisd %xmm1, %xmm0 290 ; SSE-NEXT: cmovaeq %rax, %rdx 291 ; SSE-NEXT: movd %rdx, %xmm0 292 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 293 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 294 ; SSE-NEXT: cvttsd2si %xmm0, %rax 295 ; SSE-NEXT: xorq %rax, %rcx 296 ; SSE-NEXT: ucomisd %xmm1, %xmm0 297 ; SSE-NEXT: cmovbq %rax, %rcx 298 ; SSE-NEXT: movd %rcx, %xmm1 299 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 300 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 301 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 302 ; SSE-NEXT: retq 303 ; 304 ; AVX-LABEL: fptoui_4f64_to_2i32: 305 ; AVX: # BB#0: 306 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 307 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 308 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 309 ; AVX-NEXT: vmovd %ecx, %xmm0 310 ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 311 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 312 ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 313 ; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 314 ; AVX-NEXT: retq 315 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 316 %cvt = fptoui <4 x double> %ext to <4 x i32> 317 ret <4 x i32> %cvt 318 } 319 320 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { 321 ; SSE-LABEL: fptoui_4f64_to_4i64: 322 ; SSE: # BB#0: 323 ; SSE-NEXT: movapd %xmm0, %xmm2 324 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 325 ; SSE-NEXT: subsd %xmm3, %xmm0 326 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 327 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 328 ; SSE-NEXT: xorq %rax, %rcx 329 ; SSE-NEXT: cvttsd2si %xmm2, %rdx 330 ; SSE-NEXT: ucomisd %xmm3, %xmm2 331 ; SSE-NEXT: cmovaeq %rcx, %rdx 332 ; SSE-NEXT: movd %rdx, %xmm0 333 ; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 334 ; SSE-NEXT: movapd %xmm2, %xmm4 335 ; SSE-NEXT: subsd %xmm3, %xmm4 336 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 337 ; SSE-NEXT: xorq %rax, %rcx 338 ; SSE-NEXT: cvttsd2si %xmm2, %rdx 339 ; SSE-NEXT: ucomisd %xmm3, %xmm2 340 ; SSE-NEXT: cmovaeq %rcx, %rdx 341 ; SSE-NEXT: movd %rdx, %xmm2 342 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 343 ; SSE-NEXT: movapd %xmm1, %xmm2 344 ; SSE-NEXT: subsd %xmm3, %xmm2 345 ; SSE-NEXT: cvttsd2si %xmm2, %rcx 346 ; SSE-NEXT: xorq %rax, %rcx 347 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 348 ; SSE-NEXT: ucomisd %xmm3, %xmm1 349 ; SSE-NEXT: cmovaeq %rcx, %rdx 350 ; SSE-NEXT: movd %rdx, %xmm2 351 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 352 ; SSE-NEXT: movapd %xmm1, %xmm4 353 ; SSE-NEXT: subsd %xmm3, %xmm4 354 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 355 ; SSE-NEXT: xorq %rax, %rcx 356 ; SSE-NEXT: cvttsd2si %xmm1, %rax 357 ; SSE-NEXT: ucomisd %xmm3, %xmm1 358 ; SSE-NEXT: cmovaeq %rcx, %rax 359 ; SSE-NEXT: movd %rax, %xmm1 360 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 361 ; SSE-NEXT: movdqa %xmm2, %xmm1 362 ; SSE-NEXT: retq 363 ; 364 ; AVX-LABEL: fptoui_4f64_to_4i64: 365 ; AVX: # BB#0: 366 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 367 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 368 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm3 369 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 370 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 371 ; AVX-NEXT: xorq %rcx, %rax 372 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx 373 ; AVX-NEXT: vucomisd %xmm1, %xmm2 374 ; AVX-NEXT: cmovaeq %rax, %rdx 375 ; AVX-NEXT: vmovq %rdx, %xmm3 376 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 377 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm4 378 ; AVX-NEXT: vcvttsd2si %xmm4, %rax 379 ; AVX-NEXT: xorq %rcx, %rax 380 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx 381 ; AVX-NEXT: vucomisd %xmm1, %xmm2 382 ; AVX-NEXT: cmovaeq %rax, %rdx 383 ; AVX-NEXT: vmovq %rdx, %xmm2 384 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 385 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 386 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 387 ; AVX-NEXT: xorq %rcx, %rax 388 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 389 ; AVX-NEXT: vucomisd %xmm1, %xmm0 390 ; AVX-NEXT: cmovaeq %rax, %rdx 391 ; AVX-NEXT: vmovq %rdx, %xmm3 392 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 393 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 394 ; AVX-NEXT: vcvttsd2si %xmm4, %rax 395 ; AVX-NEXT: xorq %rcx, %rax 396 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 397 ; AVX-NEXT: vucomisd %xmm1, %xmm0 398 ; AVX-NEXT: cmovaeq %rax, %rcx 399 ; AVX-NEXT: vmovq %rcx, %xmm0 400 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 401 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 402 ; AVX-NEXT: retq 403 %cvt = fptoui <4 x double> %a to <4 x i64> 404 ret <4 x i64> %cvt 405 } 406 407 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { 408 ; SSE-LABEL: fptoui_4f64_to_4i32: 409 ; SSE: # BB#0: 410 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 411 ; SSE-NEXT: movapd %xmm1, %xmm3 412 ; SSE-NEXT: subsd %xmm2, %xmm3 413 ; SSE-NEXT: cvttsd2si %xmm3, %rcx 414 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 415 ; SSE-NEXT: xorq %rax, %rcx 416 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 417 ; SSE-NEXT: ucomisd %xmm2, %xmm1 418 ; SSE-NEXT: cmovaeq %rcx, %rdx 419 ; SSE-NEXT: movd %rdx, %xmm3 420 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 421 ; SSE-NEXT: movapd %xmm1, %xmm4 422 ; SSE-NEXT: subsd %xmm2, %xmm4 423 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 424 ; SSE-NEXT: xorq %rax, %rcx 425 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 426 ; SSE-NEXT: ucomisd %xmm2, %xmm1 427 ; SSE-NEXT: cmovaeq %rcx, %rdx 428 ; SSE-NEXT: movd %rdx, %xmm1 429 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] 430 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 431 ; SSE-NEXT: movapd %xmm0, %xmm3 432 ; SSE-NEXT: subsd %xmm2, %xmm3 433 ; SSE-NEXT: cvttsd2si %xmm3, %rcx 434 ; SSE-NEXT: xorq %rax, %rcx 435 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 436 ; SSE-NEXT: ucomisd %xmm2, %xmm0 437 ; SSE-NEXT: cmovaeq %rcx, %rdx 438 ; SSE-NEXT: movd %rdx, %xmm3 439 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 440 ; SSE-NEXT: movapd %xmm0, %xmm4 441 ; SSE-NEXT: subsd %xmm2, %xmm4 442 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 443 ; SSE-NEXT: xorq %rax, %rcx 444 ; SSE-NEXT: cvttsd2si %xmm0, %rax 445 ; SSE-NEXT: ucomisd %xmm2, %xmm0 446 ; SSE-NEXT: cmovaeq %rcx, %rax 447 ; SSE-NEXT: movd %rax, %xmm0 448 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 449 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] 450 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 451 ; SSE-NEXT: retq 452 ; 453 ; AVX-LABEL: fptoui_4f64_to_4i32: 454 ; AVX: # BB#0: 455 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 456 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 457 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 458 ; AVX-NEXT: vmovd %ecx, %xmm1 459 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 460 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 461 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 462 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 463 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 464 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 465 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 466 ; AVX-NEXT: vzeroupper 467 ; AVX-NEXT: retq 468 %cvt = fptoui <4 x double> %a to <4 x i32> 469 ret <4 x i32> %cvt 470 } 471 472 ; 473 ; Float to Signed Integer 474 ; 475 476 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { 477 ; SSE-LABEL: fptosi_4f32_to_4i32: 478 ; SSE: # BB#0: 479 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 480 ; SSE-NEXT: retq 481 ; 482 ; AVX-LABEL: fptosi_4f32_to_4i32: 483 ; AVX: # BB#0: 484 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 485 ; AVX-NEXT: retq 486 %cvt = fptosi <4 x float> %a to <4 x i32> 487 ret <4 x i32> %cvt 488 } 489 490 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { 491 ; SSE-LABEL: fptosi_2f32_to_2i64: 492 ; SSE: # BB#0: 493 ; SSE-NEXT: cvttss2si %xmm0, %rax 494 ; SSE-NEXT: movd %rax, %xmm1 495 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 496 ; SSE-NEXT: cvttss2si %xmm0, %rax 497 ; SSE-NEXT: movd %rax, %xmm0 498 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 499 ; SSE-NEXT: movdqa %xmm1, %xmm0 500 ; SSE-NEXT: retq 501 ; 502 ; AVX-LABEL: fptosi_2f32_to_2i64: 503 ; AVX: # BB#0: 504 ; AVX-NEXT: vcvttss2si %xmm0, %rax 505 ; AVX-NEXT: vmovq %rax, %xmm1 506 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 507 ; AVX-NEXT: vcvttss2si %xmm0, %rax 508 ; AVX-NEXT: vmovq %rax, %xmm0 509 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 510 ; AVX-NEXT: retq 511 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 512 %cvt = fptosi <2 x float> %shuf to <2 x i64> 513 ret <2 x i64> %cvt 514 } 515 516 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { 517 ; SSE-LABEL: fptosi_4f32_to_2i64: 518 ; SSE: # BB#0: 519 ; SSE-NEXT: cvttss2si %xmm0, %rax 520 ; SSE-NEXT: movd %rax, %xmm1 521 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 522 ; SSE-NEXT: cvttss2si %xmm0, %rax 523 ; SSE-NEXT: movd %rax, %xmm0 524 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 525 ; SSE-NEXT: movdqa %xmm1, %xmm0 526 ; SSE-NEXT: retq 527 ; 528 ; AVX-LABEL: fptosi_4f32_to_2i64: 529 ; AVX: # BB#0: 530 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 531 ; AVX-NEXT: vcvttss2si %xmm1, %rax 532 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 533 ; AVX-NEXT: vmovq %rcx, %xmm0 534 ; AVX-NEXT: vmovq %rax, %xmm1 535 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 536 ; AVX-NEXT: retq 537 %cvt = fptosi <4 x float> %a to <4 x i64> 538 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 539 ret <2 x i64> %shuf 540 } 541 542 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { 543 ; SSE-LABEL: fptosi_8f32_to_8i32: 544 ; SSE: # BB#0: 545 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 546 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1 547 ; SSE-NEXT: retq 548 ; 549 ; AVX-LABEL: fptosi_8f32_to_8i32: 550 ; AVX: # BB#0: 551 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 552 ; AVX-NEXT: retq 553 %cvt = fptosi <8 x float> %a to <8 x i32> 554 ret <8 x i32> %cvt 555 } 556 557 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { 558 ; SSE-LABEL: fptosi_4f32_to_4i64: 559 ; SSE: # BB#0: 560 ; SSE-NEXT: cvttss2si %xmm0, %rax 561 ; SSE-NEXT: movd %rax, %xmm2 562 ; SSE-NEXT: movaps %xmm0, %xmm1 563 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 564 ; SSE-NEXT: cvttss2si %xmm1, %rax 565 ; SSE-NEXT: movd %rax, %xmm1 566 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 567 ; SSE-NEXT: movaps %xmm0, %xmm1 568 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 569 ; SSE-NEXT: cvttss2si %xmm1, %rax 570 ; SSE-NEXT: movd %rax, %xmm3 571 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 572 ; SSE-NEXT: cvttss2si %xmm0, %rax 573 ; SSE-NEXT: movd %rax, %xmm1 574 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 575 ; SSE-NEXT: movdqa %xmm2, %xmm0 576 ; SSE-NEXT: retq 577 ; 578 ; AVX-LABEL: fptosi_4f32_to_4i64: 579 ; AVX: # BB#0: 580 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 581 ; AVX-NEXT: vcvttss2si %xmm1, %rax 582 ; AVX-NEXT: vmovq %rax, %xmm1 583 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 584 ; AVX-NEXT: vcvttss2si %xmm2, %rax 585 ; AVX-NEXT: vmovq %rax, %xmm2 586 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 587 ; AVX-NEXT: vcvttss2si %xmm0, %rax 588 ; AVX-NEXT: vmovq %rax, %xmm2 589 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 590 ; AVX-NEXT: vcvttss2si %xmm0, %rax 591 ; AVX-NEXT: vmovq %rax, %xmm0 592 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 593 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 594 ; AVX-NEXT: retq 595 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 596 %cvt = fptosi <4 x float> %shuf to <4 x i64> 597 ret <4 x i64> %cvt 598 } 599 600 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { 601 ; SSE-LABEL: fptosi_8f32_to_4i64: 602 ; SSE: # BB#0: 603 ; SSE-NEXT: cvttss2si %xmm0, %rax 604 ; SSE-NEXT: movd %rax, %xmm2 605 ; SSE-NEXT: movaps %xmm0, %xmm1 606 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 607 ; SSE-NEXT: cvttss2si %xmm1, %rax 608 ; SSE-NEXT: movd %rax, %xmm1 609 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 610 ; SSE-NEXT: movaps %xmm0, %xmm1 611 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 612 ; SSE-NEXT: cvttss2si %xmm1, %rax 613 ; SSE-NEXT: movd %rax, %xmm3 614 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 615 ; SSE-NEXT: cvttss2si %xmm0, %rax 616 ; SSE-NEXT: movd %rax, %xmm1 617 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 618 ; SSE-NEXT: movdqa %xmm2, %xmm0 619 ; SSE-NEXT: retq 620 ; 621 ; AVX-LABEL: fptosi_8f32_to_4i64: 622 ; AVX: # BB#0: 623 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 624 ; AVX-NEXT: vcvttss2si %xmm1, %rax 625 ; AVX-NEXT: vmovq %rax, %xmm1 626 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 627 ; AVX-NEXT: vcvttss2si %xmm2, %rax 628 ; AVX-NEXT: vmovq %rax, %xmm2 629 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 630 ; AVX-NEXT: vcvttss2si %xmm0, %rax 631 ; AVX-NEXT: vmovq %rax, %xmm2 632 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 633 ; AVX-NEXT: vcvttss2si %xmm0, %rax 634 ; AVX-NEXT: vmovq %rax, %xmm0 635 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 636 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 637 ; AVX-NEXT: retq 638 %cvt = fptosi <8 x float> %a to <8 x i64> 639 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 640 ret <4 x i64> %shuf 641 } 642 643 ; 644 ; Float to Unsigned Integer 645 ; 646 647 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { 648 ; SSE-LABEL: fptoui_4f32_to_4i32: 649 ; SSE: # BB#0: 650 ; SSE-NEXT: movaps %xmm0, %xmm1 651 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 652 ; SSE-NEXT: cvttss2si %xmm1, %rax 653 ; SSE-NEXT: movd %eax, %xmm1 654 ; SSE-NEXT: movaps %xmm0, %xmm2 655 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 656 ; SSE-NEXT: cvttss2si %xmm2, %rax 657 ; SSE-NEXT: movd %eax, %xmm2 658 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 659 ; SSE-NEXT: cvttss2si %xmm0, %rax 660 ; SSE-NEXT: movd %eax, %xmm1 661 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 662 ; SSE-NEXT: cvttss2si %xmm0, %rax 663 ; SSE-NEXT: movd %eax, %xmm0 664 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 665 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 666 ; SSE-NEXT: movdqa %xmm1, %xmm0 667 ; SSE-NEXT: retq 668 ; 669 ; AVX-LABEL: fptoui_4f32_to_4i32: 670 ; AVX: # BB#0: 671 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 672 ; AVX-NEXT: vcvttss2si %xmm1, %rax 673 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 674 ; AVX-NEXT: vmovd %ecx, %xmm1 675 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 676 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 677 ; AVX-NEXT: vcvttss2si %xmm2, %rax 678 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 679 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 680 ; AVX-NEXT: vcvttss2si %xmm0, %rax 681 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 682 ; AVX-NEXT: retq 683 %cvt = fptoui <4 x float> %a to <4 x i32> 684 ret <4 x i32> %cvt 685 } 686 687 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { 688 ; SSE-LABEL: fptoui_2f32_to_2i64: 689 ; SSE: # BB#0: 690 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 691 ; SSE-NEXT: movaps %xmm0, %xmm1 692 ; SSE-NEXT: subss %xmm2, %xmm1 693 ; SSE-NEXT: cvttss2si %xmm1, %rax 694 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 695 ; SSE-NEXT: xorq %rcx, %rax 696 ; SSE-NEXT: cvttss2si %xmm0, %rdx 697 ; SSE-NEXT: ucomiss %xmm2, %xmm0 698 ; SSE-NEXT: cmovaeq %rax, %rdx 699 ; SSE-NEXT: movd %rdx, %xmm1 700 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 701 ; SSE-NEXT: movaps %xmm0, %xmm3 702 ; SSE-NEXT: subss %xmm2, %xmm3 703 ; SSE-NEXT: cvttss2si %xmm3, %rax 704 ; SSE-NEXT: xorq %rcx, %rax 705 ; SSE-NEXT: cvttss2si %xmm0, %rcx 706 ; SSE-NEXT: ucomiss %xmm2, %xmm0 707 ; SSE-NEXT: cmovaeq %rax, %rcx 708 ; SSE-NEXT: movd %rcx, %xmm0 709 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 710 ; SSE-NEXT: movdqa %xmm1, %xmm0 711 ; SSE-NEXT: retq 712 ; 713 ; AVX-LABEL: fptoui_2f32_to_2i64: 714 ; AVX: # BB#0: 715 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 716 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 717 ; AVX-NEXT: vcvttss2si %xmm2, %rax 718 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 719 ; AVX-NEXT: xorq %rcx, %rax 720 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 721 ; AVX-NEXT: vucomiss %xmm1, %xmm0 722 ; AVX-NEXT: cmovaeq %rax, %rdx 723 ; AVX-NEXT: vmovq %rdx, %xmm2 724 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 725 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 726 ; AVX-NEXT: vcvttss2si %xmm3, %rax 727 ; AVX-NEXT: xorq %rcx, %rax 728 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 729 ; AVX-NEXT: vucomiss %xmm1, %xmm0 730 ; AVX-NEXT: cmovaeq %rax, %rcx 731 ; AVX-NEXT: vmovq %rcx, %xmm0 732 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 733 ; AVX-NEXT: retq 734 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 735 %cvt = fptoui <2 x float> %shuf to <2 x i64> 736 ret <2 x i64> %cvt 737 } 738 739 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { 740 ; SSE-LABEL: fptoui_4f32_to_2i64: 741 ; SSE: # BB#0: 742 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 743 ; SSE-NEXT: movaps %xmm0, %xmm1 744 ; SSE-NEXT: subss %xmm2, %xmm1 745 ; SSE-NEXT: cvttss2si %xmm1, %rax 746 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 747 ; SSE-NEXT: xorq %rcx, %rax 748 ; SSE-NEXT: cvttss2si %xmm0, %rdx 749 ; SSE-NEXT: ucomiss %xmm2, %xmm0 750 ; SSE-NEXT: cmovaeq %rax, %rdx 751 ; SSE-NEXT: movd %rdx, %xmm1 752 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 753 ; SSE-NEXT: movaps %xmm0, %xmm3 754 ; SSE-NEXT: subss %xmm2, %xmm3 755 ; SSE-NEXT: cvttss2si %xmm3, %rax 756 ; SSE-NEXT: xorq %rcx, %rax 757 ; SSE-NEXT: cvttss2si %xmm0, %rcx 758 ; SSE-NEXT: ucomiss %xmm2, %xmm0 759 ; SSE-NEXT: cmovaeq %rax, %rcx 760 ; SSE-NEXT: movd %rcx, %xmm0 761 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 762 ; SSE-NEXT: movdqa %xmm1, %xmm0 763 ; SSE-NEXT: retq 764 ; 765 ; AVX-LABEL: fptoui_4f32_to_2i64: 766 ; AVX: # BB#0: 767 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 768 ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 769 ; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm3 770 ; AVX-NEXT: vcvttss2si %xmm3, %rax 771 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 772 ; AVX-NEXT: xorq %rcx, %rax 773 ; AVX-NEXT: vcvttss2si %xmm1, %rdx 774 ; AVX-NEXT: vucomiss %xmm2, %xmm1 775 ; AVX-NEXT: cmovaeq %rax, %rdx 776 ; AVX-NEXT: vsubss %xmm2, %xmm0, %xmm1 777 ; AVX-NEXT: vcvttss2si %xmm1, %rax 778 ; AVX-NEXT: xorq %rcx, %rax 779 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 780 ; AVX-NEXT: vucomiss %xmm2, %xmm0 781 ; AVX-NEXT: cmovaeq %rax, %rcx 782 ; AVX-NEXT: vmovq %rcx, %xmm0 783 ; AVX-NEXT: vmovq %rdx, %xmm1 784 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 785 ; AVX-NEXT: retq 786 %cvt = fptoui <4 x float> %a to <4 x i64> 787 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 788 ret <2 x i64> %shuf 789 } 790 791 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { 792 ; SSE-LABEL: fptoui_8f32_to_8i32: 793 ; SSE: # BB#0: 794 ; SSE-NEXT: movaps %xmm0, %xmm2 795 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 796 ; SSE-NEXT: cvttss2si %xmm0, %rax 797 ; SSE-NEXT: movd %eax, %xmm0 798 ; SSE-NEXT: movaps %xmm2, %xmm3 799 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 800 ; SSE-NEXT: cvttss2si %xmm3, %rax 801 ; SSE-NEXT: movd %eax, %xmm3 802 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 803 ; SSE-NEXT: cvttss2si %xmm2, %rax 804 ; SSE-NEXT: movd %eax, %xmm0 805 ; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 806 ; SSE-NEXT: cvttss2si %xmm2, %rax 807 ; SSE-NEXT: movd %eax, %xmm2 808 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 809 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 810 ; SSE-NEXT: movaps %xmm1, %xmm2 811 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 812 ; SSE-NEXT: cvttss2si %xmm2, %rax 813 ; SSE-NEXT: movd %eax, %xmm2 814 ; SSE-NEXT: movaps %xmm1, %xmm3 815 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 816 ; SSE-NEXT: cvttss2si %xmm3, %rax 817 ; SSE-NEXT: movd %eax, %xmm3 818 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 819 ; SSE-NEXT: cvttss2si %xmm1, %rax 820 ; SSE-NEXT: movd %eax, %xmm2 821 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 822 ; SSE-NEXT: cvttss2si %xmm1, %rax 823 ; SSE-NEXT: movd %eax, %xmm1 824 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 825 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 826 ; SSE-NEXT: movdqa %xmm2, %xmm1 827 ; SSE-NEXT: retq 828 ; 829 ; AVX-LABEL: fptoui_8f32_to_8i32: 830 ; AVX: # BB#0: 831 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 832 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 833 ; AVX-NEXT: vcvttss2si %xmm2, %rax 834 ; AVX-NEXT: vcvttss2si %xmm1, %rcx 835 ; AVX-NEXT: vmovd %ecx, %xmm2 836 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 837 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 838 ; AVX-NEXT: vcvttss2si %xmm3, %rax 839 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 840 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 841 ; AVX-NEXT: vcvttss2si %xmm1, %rax 842 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 843 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 844 ; AVX-NEXT: vcvttss2si %xmm2, %rax 845 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 846 ; AVX-NEXT: vmovd %ecx, %xmm2 847 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 848 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 849 ; AVX-NEXT: vcvttss2si %xmm3, %rax 850 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 851 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 852 ; AVX-NEXT: vcvttss2si %xmm0, %rax 853 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 854 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 855 ; AVX-NEXT: retq 856 %cvt = fptoui <8 x float> %a to <8 x i32> 857 ret <8 x i32> %cvt 858 } 859 860 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { 861 ; SSE-LABEL: fptoui_4f32_to_4i64: 862 ; SSE: # BB#0: 863 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 864 ; SSE-NEXT: movaps %xmm0, %xmm2 865 ; SSE-NEXT: subss %xmm1, %xmm2 866 ; SSE-NEXT: cvttss2si %xmm2, %rcx 867 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 868 ; SSE-NEXT: xorq %rax, %rcx 869 ; SSE-NEXT: cvttss2si %xmm0, %rdx 870 ; SSE-NEXT: ucomiss %xmm1, %xmm0 871 ; SSE-NEXT: cmovaeq %rcx, %rdx 872 ; SSE-NEXT: movd %rdx, %xmm2 873 ; SSE-NEXT: movaps %xmm0, %xmm3 874 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 875 ; SSE-NEXT: movaps %xmm3, %xmm4 876 ; SSE-NEXT: subss %xmm1, %xmm4 877 ; SSE-NEXT: cvttss2si %xmm4, %rcx 878 ; SSE-NEXT: xorq %rax, %rcx 879 ; SSE-NEXT: cvttss2si %xmm3, %rdx 880 ; SSE-NEXT: ucomiss %xmm1, %xmm3 881 ; SSE-NEXT: cmovaeq %rcx, %rdx 882 ; SSE-NEXT: movd %rdx, %xmm3 883 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 884 ; SSE-NEXT: movaps %xmm0, %xmm3 885 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 886 ; SSE-NEXT: movaps %xmm3, %xmm4 887 ; SSE-NEXT: subss %xmm1, %xmm4 888 ; SSE-NEXT: cvttss2si %xmm4, %rcx 889 ; SSE-NEXT: xorq %rax, %rcx 890 ; SSE-NEXT: cvttss2si %xmm3, %rdx 891 ; SSE-NEXT: ucomiss %xmm1, %xmm3 892 ; SSE-NEXT: cmovaeq %rcx, %rdx 893 ; SSE-NEXT: movd %rdx, %xmm3 894 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 895 ; SSE-NEXT: movapd %xmm0, %xmm4 896 ; SSE-NEXT: subss %xmm1, %xmm4 897 ; SSE-NEXT: cvttss2si %xmm4, %rcx 898 ; SSE-NEXT: xorq %rax, %rcx 899 ; SSE-NEXT: cvttss2si %xmm0, %rax 900 ; SSE-NEXT: ucomiss %xmm1, %xmm0 901 ; SSE-NEXT: cmovaeq %rcx, %rax 902 ; SSE-NEXT: movd %rax, %xmm1 903 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 904 ; SSE-NEXT: movdqa %xmm2, %xmm0 905 ; SSE-NEXT: retq 906 ; 907 ; AVX-LABEL: fptoui_4f32_to_4i64: 908 ; AVX: # BB#0: 909 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 910 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 911 ; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 912 ; AVX-NEXT: vcvttss2si %xmm3, %rax 913 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 914 ; AVX-NEXT: xorq %rcx, %rax 915 ; AVX-NEXT: vcvttss2si %xmm2, %rdx 916 ; AVX-NEXT: vucomiss %xmm1, %xmm2 917 ; AVX-NEXT: cmovaeq %rax, %rdx 918 ; AVX-NEXT: vmovq %rdx, %xmm2 919 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 920 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 921 ; AVX-NEXT: vcvttss2si %xmm4, %rax 922 ; AVX-NEXT: xorq %rcx, %rax 923 ; AVX-NEXT: vcvttss2si %xmm3, %rdx 924 ; AVX-NEXT: vucomiss %xmm1, %xmm3 925 ; AVX-NEXT: cmovaeq %rax, %rdx 926 ; AVX-NEXT: vmovq %rdx, %xmm3 927 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 928 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 929 ; AVX-NEXT: vcvttss2si %xmm3, %rax 930 ; AVX-NEXT: xorq %rcx, %rax 931 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 932 ; AVX-NEXT: vucomiss %xmm1, %xmm0 933 ; AVX-NEXT: cmovaeq %rax, %rdx 934 ; AVX-NEXT: vmovq %rdx, %xmm3 935 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 936 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 937 ; AVX-NEXT: vcvttss2si %xmm4, %rax 938 ; AVX-NEXT: xorq %rcx, %rax 939 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 940 ; AVX-NEXT: vucomiss %xmm1, %xmm0 941 ; AVX-NEXT: cmovaeq %rax, %rcx 942 ; AVX-NEXT: vmovq %rcx, %xmm0 943 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 944 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 945 ; AVX-NEXT: retq 946 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 947 %cvt = fptoui <4 x float> %shuf to <4 x i64> 948 ret <4 x i64> %cvt 949 } 950 951 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { 952 ; SSE-LABEL: fptoui_8f32_to_4i64: 953 ; SSE: # BB#0: 954 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 955 ; SSE-NEXT: movaps %xmm0, %xmm2 956 ; SSE-NEXT: subss %xmm1, %xmm2 957 ; SSE-NEXT: cvttss2si %xmm2, %rcx 958 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 959 ; SSE-NEXT: xorq %rax, %rcx 960 ; SSE-NEXT: cvttss2si %xmm0, %rdx 961 ; SSE-NEXT: ucomiss %xmm1, %xmm0 962 ; SSE-NEXT: cmovaeq %rcx, %rdx 963 ; SSE-NEXT: movd %rdx, %xmm2 964 ; SSE-NEXT: movaps %xmm0, %xmm3 965 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 966 ; SSE-NEXT: movaps %xmm3, %xmm4 967 ; SSE-NEXT: subss %xmm1, %xmm4 968 ; SSE-NEXT: cvttss2si %xmm4, %rcx 969 ; SSE-NEXT: xorq %rax, %rcx 970 ; SSE-NEXT: cvttss2si %xmm3, %rdx 971 ; SSE-NEXT: ucomiss %xmm1, %xmm3 972 ; SSE-NEXT: cmovaeq %rcx, %rdx 973 ; SSE-NEXT: movd %rdx, %xmm3 974 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 975 ; SSE-NEXT: movaps %xmm0, %xmm3 976 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 977 ; SSE-NEXT: movaps %xmm3, %xmm4 978 ; SSE-NEXT: subss %xmm1, %xmm4 979 ; SSE-NEXT: cvttss2si %xmm4, %rcx 980 ; SSE-NEXT: xorq %rax, %rcx 981 ; SSE-NEXT: cvttss2si %xmm3, %rdx 982 ; SSE-NEXT: ucomiss %xmm1, %xmm3 983 ; SSE-NEXT: cmovaeq %rcx, %rdx 984 ; SSE-NEXT: movd %rdx, %xmm3 985 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 986 ; SSE-NEXT: movapd %xmm0, %xmm4 987 ; SSE-NEXT: subss %xmm1, %xmm4 988 ; SSE-NEXT: cvttss2si %xmm4, %rcx 989 ; SSE-NEXT: xorq %rax, %rcx 990 ; SSE-NEXT: cvttss2si %xmm0, %rax 991 ; SSE-NEXT: ucomiss %xmm1, %xmm0 992 ; SSE-NEXT: cmovaeq %rcx, %rax 993 ; SSE-NEXT: movd %rax, %xmm1 994 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 995 ; SSE-NEXT: movdqa %xmm2, %xmm0 996 ; SSE-NEXT: retq 997 ; 998 ; AVX-LABEL: fptoui_8f32_to_4i64: 999 ; AVX: # BB#0: 1000 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 1001 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1002 ; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 1003 ; AVX-NEXT: vcvttss2si %xmm3, %rax 1004 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1005 ; AVX-NEXT: xorq %rcx, %rax 1006 ; AVX-NEXT: vcvttss2si %xmm2, %rdx 1007 ; AVX-NEXT: vucomiss %xmm1, %xmm2 1008 ; AVX-NEXT: cmovaeq %rax, %rdx 1009 ; AVX-NEXT: vmovq %rdx, %xmm2 1010 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1011 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 1012 ; AVX-NEXT: vcvttss2si %xmm4, %rax 1013 ; AVX-NEXT: xorq %rcx, %rax 1014 ; AVX-NEXT: vcvttss2si %xmm3, %rdx 1015 ; AVX-NEXT: vucomiss %xmm1, %xmm3 1016 ; AVX-NEXT: cmovaeq %rax, %rdx 1017 ; AVX-NEXT: vmovq %rdx, %xmm3 1018 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1019 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 1020 ; AVX-NEXT: vcvttss2si %xmm3, %rax 1021 ; AVX-NEXT: xorq %rcx, %rax 1022 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 1023 ; AVX-NEXT: vucomiss %xmm1, %xmm0 1024 ; AVX-NEXT: cmovaeq %rax, %rdx 1025 ; AVX-NEXT: vmovq %rdx, %xmm3 1026 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1027 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 1028 ; AVX-NEXT: vcvttss2si %xmm4, %rax 1029 ; AVX-NEXT: xorq %rcx, %rax 1030 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 1031 ; AVX-NEXT: vucomiss %xmm1, %xmm0 1032 ; AVX-NEXT: cmovaeq %rax, %rcx 1033 ; AVX-NEXT: vmovq %rcx, %xmm0 1034 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1035 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1036 ; AVX-NEXT: retq 1037 %cvt = fptoui <8 x float> %a to <8 x i64> 1038 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1039 ret <4 x i64> %shuf 1040 } 1041 1042 ; 1043 ; Constant Folding 1044 ; 1045 1046 define <2 x i64> @fptosi_2f64_to_2i64_const() { 1047 ; SSE-LABEL: fptosi_2f64_to_2i64_const: 1048 ; SSE: # BB#0: 1049 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1050 ; SSE-NEXT: retq 1051 ; 1052 ; AVX-LABEL: fptosi_2f64_to_2i64_const: 1053 ; AVX: # BB#0: 1054 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] 1055 ; AVX-NEXT: retq 1056 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64> 1057 ret <2 x i64> %cvt 1058 } 1059 1060 define <4 x i32> @fptosi_2f64_to_2i32_const() { 1061 ; SSE-LABEL: fptosi_2f64_to_2i32_const: 1062 ; SSE: # BB#0: 1063 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1064 ; SSE-NEXT: retq 1065 ; 1066 ; AVX-LABEL: fptosi_2f64_to_2i32_const: 1067 ; AVX: # BB#0: 1068 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1069 ; AVX-NEXT: retq 1070 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32> 1071 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1072 ret <4 x i32> %ext 1073 } 1074 1075 define <4 x i64> @fptosi_4f64_to_4i64_const() { 1076 ; SSE-LABEL: fptosi_4f64_to_4i64_const: 1077 ; SSE: # BB#0: 1078 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1079 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613] 1080 ; SSE-NEXT: retq 1081 ; 1082 ; AVX-LABEL: fptosi_4f64_to_4i64_const: 1083 ; AVX: # BB#0: 1084 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1085 ; AVX-NEXT: retq 1086 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64> 1087 ret <4 x i64> %cvt 1088 } 1089 1090 define <4 x i32> @fptosi_4f64_to_4i32_const() { 1091 ; SSE-LABEL: fptosi_4f64_to_4i32_const: 1092 ; SSE: # BB#0: 1093 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1094 ; SSE-NEXT: retq 1095 ; 1096 ; AVX-LABEL: fptosi_4f64_to_4i32_const: 1097 ; AVX: # BB#0: 1098 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1099 ; AVX-NEXT: retq 1100 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32> 1101 ret <4 x i32> %cvt 1102 } 1103 1104 define <2 x i64> @fptoui_2f64_to_2i64_const() { 1105 ; SSE-LABEL: fptoui_2f64_to_2i64_const: 1106 ; SSE: # BB#0: 1107 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1108 ; SSE-NEXT: retq 1109 ; 1110 ; AVX-LABEL: fptoui_2f64_to_2i64_const: 1111 ; AVX: # BB#0: 1112 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] 1113 ; AVX-NEXT: retq 1114 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64> 1115 ret <2 x i64> %cvt 1116 } 1117 1118 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { 1119 ; SSE-LABEL: fptoui_2f64_to_2i32_const: 1120 ; SSE: # BB#0: 1121 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u> 1122 ; SSE-NEXT: retq 1123 ; 1124 ; AVX-LABEL: fptoui_2f64_to_2i32_const: 1125 ; AVX: # BB#0: 1126 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> 1127 ; AVX-NEXT: retq 1128 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32> 1129 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1130 ret <4 x i32> %ext 1131 } 1132 1133 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { 1134 ; SSE-LABEL: fptoui_4f64_to_4i64_const: 1135 ; SSE: # BB#0: 1136 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1137 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8] 1138 ; SSE-NEXT: retq 1139 ; 1140 ; AVX-LABEL: fptoui_4f64_to_4i64_const: 1141 ; AVX: # BB#0: 1142 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] 1143 ; AVX-NEXT: retq 1144 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64> 1145 ret <4 x i64> %cvt 1146 } 1147 1148 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { 1149 ; SSE-LABEL: fptoui_4f64_to_4i32_const: 1150 ; SSE: # BB#0: 1151 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8] 1152 ; SSE-NEXT: retq 1153 ; 1154 ; AVX-LABEL: fptoui_4f64_to_4i32_const: 1155 ; AVX: # BB#0: 1156 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] 1157 ; AVX-NEXT: retq 1158 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32> 1159 ret <4 x i32> %cvt 1160 } 1161 1162 define <4 x i32> @fptosi_4f32_to_4i32_const() { 1163 ; SSE-LABEL: fptosi_4f32_to_4i32_const: 1164 ; SSE: # BB#0: 1165 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1166 ; SSE-NEXT: retq 1167 ; 1168 ; AVX-LABEL: fptosi_4f32_to_4i32_const: 1169 ; AVX: # BB#0: 1170 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1171 ; AVX-NEXT: retq 1172 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32> 1173 ret <4 x i32> %cvt 1174 } 1175 1176 define <4 x i64> @fptosi_4f32_to_4i64_const() { 1177 ; SSE-LABEL: fptosi_4f32_to_4i64_const: 1178 ; SSE: # BB#0: 1179 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1180 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] 1181 ; SSE-NEXT: retq 1182 ; 1183 ; AVX-LABEL: fptosi_4f32_to_4i64_const: 1184 ; AVX: # BB#0: 1185 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 1186 ; AVX-NEXT: retq 1187 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64> 1188 ret <4 x i64> %cvt 1189 } 1190 1191 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { 1192 ; SSE-LABEL: fptosi_8f32_to_8i32_const: 1193 ; SSE: # BB#0: 1194 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1195 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295] 1196 ; SSE-NEXT: retq 1197 ; 1198 ; AVX-LABEL: fptosi_8f32_to_8i32_const: 1199 ; AVX: # BB#0: 1200 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 1201 ; AVX-NEXT: retq 1202 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32> 1203 ret <8 x i32> %cvt 1204 } 1205 1206 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { 1207 ; SSE-LABEL: fptoui_4f32_to_4i32_const: 1208 ; SSE: # BB#0: 1209 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1210 ; SSE-NEXT: retq 1211 ; 1212 ; AVX-LABEL: fptoui_4f32_to_4i32_const: 1213 ; AVX: # BB#0: 1214 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] 1215 ; AVX-NEXT: retq 1216 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32> 1217 ret <4 x i32> %cvt 1218 } 1219 1220 define <4 x i64> @fptoui_4f32_to_4i64_const() { 1221 ; SSE-LABEL: fptoui_4f32_to_4i64_const: 1222 ; SSE: # BB#0: 1223 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2] 1224 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8] 1225 ; SSE-NEXT: retq 1226 ; 1227 ; AVX-LABEL: fptoui_4f32_to_4i64_const: 1228 ; AVX: # BB#0: 1229 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 1230 ; AVX-NEXT: retq 1231 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64> 1232 ret <4 x i64> %cvt 1233 } 1234 1235 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { 1236 ; SSE-LABEL: fptoui_8f32_to_8i32_const: 1237 ; SSE: # BB#0: 1238 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1239 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1] 1240 ; SSE-NEXT: retq 1241 ; 1242 ; AVX-LABEL: fptoui_8f32_to_8i32_const: 1243 ; AVX: # BB#0: 1244 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 1245 ; AVX-NEXT: retq 1246 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32> 1247 ret <8 x i32> %cvt 1248 } 1249