1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3 ; 4 ; 32-bit tests to make sure we're not doing anything stupid. 5 ; RUN: llc < %s -mtriple=i686-unknown-unknown 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 8 9 ; 10 ; Double to Signed Integer 11 ; 12 13 define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { 14 ; SSE-LABEL: fptosi_2f64_to_2i64: 15 ; SSE: # BB#0: 16 ; SSE-NEXT: cvttsd2si %xmm0, %rax 17 ; SSE-NEXT: movd %rax, %xmm1 18 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 19 ; SSE-NEXT: cvttsd2si %xmm0, %rax 20 ; SSE-NEXT: movd %rax, %xmm0 21 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 22 ; SSE-NEXT: movdqa %xmm1, %xmm0 23 ; SSE-NEXT: retq 24 ; 25 ; AVX-LABEL: fptosi_2f64_to_2i64: 26 ; AVX: # BB#0: 27 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 28 ; AVX-NEXT: vmovq %rax, %xmm1 29 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 30 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 31 ; AVX-NEXT: vmovq %rax, %xmm0 32 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 33 ; AVX-NEXT: retq 34 %cvt = fptosi <2 x double> %a to <2 x i64> 35 ret <2 x i64> %cvt 36 } 37 38 define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { 39 ; SSE-LABEL: fptosi_2f64_to_2i32: 40 ; SSE: # BB#0: 41 ; SSE-NEXT: cvttsd2si %xmm0, %rax 42 ; SSE-NEXT: movd %rax, %xmm1 43 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 44 ; SSE-NEXT: cvttsd2si %xmm0, %rax 45 ; SSE-NEXT: movd %rax, %xmm0 46 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 47 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 48 ; SSE-NEXT: retq 49 ; 50 ; AVX-LABEL: fptosi_2f64_to_2i32: 51 ; AVX: # BB#0: 52 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 53 ; AVX-NEXT: vmovq %rax, %xmm1 54 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 55 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 56 ; AVX-NEXT: vmovq %rax, %xmm0 57 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 58 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 59 ; AVX-NEXT: retq 60 %cvt = fptosi <2 x double> %a to <2 x i32> 61 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 62 ret <4 x i32> %ext 63 } 64 65 define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { 66 ; SSE-LABEL: fptosi_4f64_to_2i32: 67 ; SSE: # BB#0: 68 ; SSE-NEXT: cvttsd2si %xmm0, %rax 69 ; SSE-NEXT: movd %rax, %xmm1 70 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 71 ; SSE-NEXT: cvttsd2si %xmm0, %rax 72 ; SSE-NEXT: movd %rax, %xmm0 73 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 74 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] 75 ; SSE-NEXT: cvttsd2si %xmm0, %rax 76 ; SSE-NEXT: movd %rax, %xmm1 77 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 78 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 79 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 80 ; SSE-NEXT: retq 81 ; 82 ; AVX-LABEL: fptosi_4f64_to_2i32: 83 ; AVX: # BB#0: 84 ; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 85 ; AVX-NEXT: vzeroupper 86 ; AVX-NEXT: retq 87 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 88 %cvt = fptosi <4 x double> %ext to <4 x i32> 89 ret <4 x i32> %cvt 90 } 91 92 define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { 93 ; SSE-LABEL: fptosi_4f64_to_4i64: 94 ; SSE: # BB#0: 95 ; SSE-NEXT: cvttsd2si %xmm0, %rax 96 ; SSE-NEXT: movd %rax, %xmm2 97 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 98 ; SSE-NEXT: cvttsd2si %xmm0, %rax 99 ; SSE-NEXT: movd %rax, %xmm0 100 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 101 ; SSE-NEXT: cvttsd2si %xmm1, %rax 102 ; SSE-NEXT: movd %rax, %xmm3 103 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 104 ; SSE-NEXT: cvttsd2si %xmm1, %rax 105 ; SSE-NEXT: movd %rax, %xmm0 106 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 107 ; SSE-NEXT: movdqa %xmm2, %xmm0 108 ; SSE-NEXT: movdqa %xmm3, %xmm1 109 ; SSE-NEXT: retq 110 ; 111 ; AVX-LABEL: fptosi_4f64_to_4i64: 112 ; AVX: # BB#0: 113 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 114 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 115 ; AVX-NEXT: vmovq %rax, %xmm2 116 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 117 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 118 ; AVX-NEXT: vmovq %rax, %xmm1 119 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 120 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 121 ; AVX-NEXT: vmovq %rax, %xmm2 122 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 123 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 124 ; AVX-NEXT: vmovq %rax, %xmm0 125 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 126 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 127 ; AVX-NEXT: retq 128 %cvt = fptosi <4 x double> %a to <4 x i64> 129 ret <4 x i64> %cvt 130 } 131 132 define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { 133 ; SSE-LABEL: fptosi_4f64_to_4i32: 134 ; SSE: # BB#0: 135 ; SSE-NEXT: cvttsd2si %xmm1, %rax 136 ; SSE-NEXT: movd %rax, %xmm2 137 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 138 ; SSE-NEXT: cvttsd2si %xmm1, %rax 139 ; SSE-NEXT: movd %rax, %xmm1 140 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 141 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 142 ; SSE-NEXT: cvttsd2si %xmm0, %rax 143 ; SSE-NEXT: movd %rax, %xmm2 144 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 145 ; SSE-NEXT: cvttsd2si %xmm0, %rax 146 ; SSE-NEXT: movd %rax, %xmm0 147 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 148 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 149 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 150 ; SSE-NEXT: retq 151 ; 152 ; AVX-LABEL: fptosi_4f64_to_4i32: 153 ; AVX: # BB#0: 154 ; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0 155 ; AVX-NEXT: vzeroupper 156 ; AVX-NEXT: retq 157 %cvt = fptosi <4 x double> %a to <4 x i32> 158 ret <4 x i32> %cvt 159 } 160 161 ; 162 ; Double to Unsigned Integer 163 ; 164 165 define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { 166 ; SSE-LABEL: fptoui_2f64_to_2i64: 167 ; SSE: # BB#0: 168 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 169 ; SSE-NEXT: movapd %xmm0, %xmm1 170 ; SSE-NEXT: subsd %xmm2, %xmm1 171 ; SSE-NEXT: cvttsd2si %xmm1, %rax 172 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 173 ; SSE-NEXT: xorq %rcx, %rax 174 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 175 ; SSE-NEXT: ucomisd %xmm2, %xmm0 176 ; SSE-NEXT: cmovaeq %rax, %rdx 177 ; SSE-NEXT: movd %rdx, %xmm1 178 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 179 ; SSE-NEXT: movapd %xmm0, %xmm3 180 ; SSE-NEXT: subsd %xmm2, %xmm3 181 ; SSE-NEXT: cvttsd2si %xmm3, %rax 182 ; SSE-NEXT: xorq %rcx, %rax 183 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 184 ; SSE-NEXT: ucomisd %xmm2, %xmm0 185 ; SSE-NEXT: cmovaeq %rax, %rcx 186 ; SSE-NEXT: movd %rcx, %xmm0 187 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 188 ; SSE-NEXT: movdqa %xmm1, %xmm0 189 ; SSE-NEXT: retq 190 ; 191 ; AVX-LABEL: fptoui_2f64_to_2i64: 192 ; AVX: # BB#0: 193 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 194 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 195 ; AVX-NEXT: vcvttsd2si %xmm2, %rax 196 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 197 ; AVX-NEXT: xorq %rcx, %rax 198 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 199 ; AVX-NEXT: vucomisd %xmm1, %xmm0 200 ; AVX-NEXT: cmovaeq %rax, %rdx 201 ; AVX-NEXT: vmovq %rdx, %xmm2 202 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 203 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 204 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 205 ; AVX-NEXT: xorq %rcx, %rax 206 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 207 ; AVX-NEXT: vucomisd %xmm1, %xmm0 208 ; AVX-NEXT: cmovaeq %rax, %rcx 209 ; AVX-NEXT: vmovq %rcx, %xmm0 210 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 211 ; AVX-NEXT: retq 212 %cvt = fptoui <2 x double> %a to <2 x i64> 213 ret <2 x i64> %cvt 214 } 215 216 define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { 217 ; SSE-LABEL: fptoui_2f64_to_2i32: 218 ; SSE: # BB#0: 219 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 220 ; SSE-NEXT: movapd %xmm0, %xmm2 221 ; SSE-NEXT: subsd %xmm1, %xmm2 222 ; SSE-NEXT: cvttsd2si %xmm2, %rax 223 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 224 ; SSE-NEXT: xorq %rcx, %rax 225 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 226 ; SSE-NEXT: ucomisd %xmm1, %xmm0 227 ; SSE-NEXT: cmovaeq %rax, %rdx 228 ; SSE-NEXT: movd %rdx, %xmm2 229 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 230 ; SSE-NEXT: movapd %xmm0, %xmm3 231 ; SSE-NEXT: subsd %xmm1, %xmm3 232 ; SSE-NEXT: cvttsd2si %xmm3, %rax 233 ; SSE-NEXT: xorq %rcx, %rax 234 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 235 ; SSE-NEXT: ucomisd %xmm1, %xmm0 236 ; SSE-NEXT: cmovaeq %rax, %rcx 237 ; SSE-NEXT: movd %rcx, %xmm0 238 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 239 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 240 ; SSE-NEXT: retq 241 ; 242 ; AVX-LABEL: fptoui_2f64_to_2i32: 243 ; AVX: # BB#0: 244 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 245 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 246 ; AVX-NEXT: vcvttsd2si %xmm2, %rax 247 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 248 ; AVX-NEXT: xorq %rcx, %rax 249 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 250 ; AVX-NEXT: vucomisd %xmm1, %xmm0 251 ; AVX-NEXT: cmovaeq %rax, %rdx 252 ; AVX-NEXT: vmovq %rdx, %xmm2 253 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 254 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 255 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 256 ; AVX-NEXT: xorq %rcx, %rax 257 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 258 ; AVX-NEXT: vucomisd %xmm1, %xmm0 259 ; AVX-NEXT: cmovaeq %rax, %rcx 260 ; AVX-NEXT: vmovq %rcx, %xmm0 261 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 262 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 263 ; AVX-NEXT: retq 264 %cvt = fptoui <2 x double> %a to <2 x i32> 265 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 266 ret <4 x i32> %ext 267 } 268 269 define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { 270 ; SSE-LABEL: fptoui_4f64_to_2i32: 271 ; SSE: # BB#0: 272 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 273 ; SSE-NEXT: movapd %xmm0, %xmm2 274 ; SSE-NEXT: subsd %xmm1, %xmm2 275 ; SSE-NEXT: cvttsd2si %xmm2, %rax 276 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 277 ; SSE-NEXT: xorq %rcx, %rax 278 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 279 ; SSE-NEXT: ucomisd %xmm1, %xmm0 280 ; SSE-NEXT: cmovaeq %rax, %rdx 281 ; SSE-NEXT: movd %rdx, %xmm2 282 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 283 ; SSE-NEXT: movapd %xmm0, %xmm3 284 ; SSE-NEXT: subsd %xmm1, %xmm3 285 ; SSE-NEXT: cvttsd2si %xmm3, %rax 286 ; SSE-NEXT: xorq %rcx, %rax 287 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 288 ; SSE-NEXT: ucomisd %xmm1, %xmm0 289 ; SSE-NEXT: cmovaeq %rax, %rdx 290 ; SSE-NEXT: movd %rdx, %xmm0 291 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 292 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] 293 ; SSE-NEXT: cvttsd2si %xmm0, %rax 294 ; SSE-NEXT: xorq %rax, %rcx 295 ; SSE-NEXT: ucomisd %xmm1, %xmm0 296 ; SSE-NEXT: cmovbq %rax, %rcx 297 ; SSE-NEXT: movd %rcx, %xmm1 298 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 299 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 300 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 301 ; SSE-NEXT: retq 302 ; 303 ; AVX-LABEL: fptoui_4f64_to_2i32: 304 ; AVX: # BB#0: 305 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 306 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 307 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 308 ; AVX-NEXT: vmovd %ecx, %xmm0 309 ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 310 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 311 ; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 312 ; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 313 ; AVX-NEXT: retq 314 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 315 %cvt = fptoui <4 x double> %ext to <4 x i32> 316 ret <4 x i32> %cvt 317 } 318 319 define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { 320 ; SSE-LABEL: fptoui_4f64_to_4i64: 321 ; SSE: # BB#0: 322 ; SSE-NEXT: movapd %xmm0, %xmm2 323 ; SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 324 ; SSE-NEXT: subsd %xmm3, %xmm0 325 ; SSE-NEXT: cvttsd2si %xmm0, %rcx 326 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 327 ; SSE-NEXT: xorq %rax, %rcx 328 ; SSE-NEXT: cvttsd2si %xmm2, %rdx 329 ; SSE-NEXT: ucomisd %xmm3, %xmm2 330 ; SSE-NEXT: cmovaeq %rcx, %rdx 331 ; SSE-NEXT: movd %rdx, %xmm0 332 ; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 333 ; SSE-NEXT: movapd %xmm2, %xmm4 334 ; SSE-NEXT: subsd %xmm3, %xmm4 335 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 336 ; SSE-NEXT: xorq %rax, %rcx 337 ; SSE-NEXT: cvttsd2si %xmm2, %rdx 338 ; SSE-NEXT: ucomisd %xmm3, %xmm2 339 ; SSE-NEXT: cmovaeq %rcx, %rdx 340 ; SSE-NEXT: movd %rdx, %xmm2 341 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 342 ; SSE-NEXT: movapd %xmm1, %xmm2 343 ; SSE-NEXT: subsd %xmm3, %xmm2 344 ; SSE-NEXT: cvttsd2si %xmm2, %rcx 345 ; SSE-NEXT: xorq %rax, %rcx 346 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 347 ; SSE-NEXT: ucomisd %xmm3, %xmm1 348 ; SSE-NEXT: cmovaeq %rcx, %rdx 349 ; SSE-NEXT: movd %rdx, %xmm2 350 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 351 ; SSE-NEXT: movapd %xmm1, %xmm4 352 ; SSE-NEXT: subsd %xmm3, %xmm4 353 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 354 ; SSE-NEXT: xorq %rax, %rcx 355 ; SSE-NEXT: cvttsd2si %xmm1, %rax 356 ; SSE-NEXT: ucomisd %xmm3, %xmm1 357 ; SSE-NEXT: cmovaeq %rcx, %rax 358 ; SSE-NEXT: movd %rax, %xmm1 359 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 360 ; SSE-NEXT: movdqa %xmm2, %xmm1 361 ; SSE-NEXT: retq 362 ; 363 ; AVX-LABEL: fptoui_4f64_to_4i64: 364 ; AVX: # BB#0: 365 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 366 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 367 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm3 368 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 369 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 370 ; AVX-NEXT: xorq %rcx, %rax 371 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx 372 ; AVX-NEXT: vucomisd %xmm1, %xmm2 373 ; AVX-NEXT: cmovaeq %rax, %rdx 374 ; AVX-NEXT: vmovq %rdx, %xmm3 375 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 376 ; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm4 377 ; AVX-NEXT: vcvttsd2si %xmm4, %rax 378 ; AVX-NEXT: xorq %rcx, %rax 379 ; AVX-NEXT: vcvttsd2si %xmm2, %rdx 380 ; AVX-NEXT: vucomisd %xmm1, %xmm2 381 ; AVX-NEXT: cmovaeq %rax, %rdx 382 ; AVX-NEXT: vmovq %rdx, %xmm2 383 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 384 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm3 385 ; AVX-NEXT: vcvttsd2si %xmm3, %rax 386 ; AVX-NEXT: xorq %rcx, %rax 387 ; AVX-NEXT: vcvttsd2si %xmm0, %rdx 388 ; AVX-NEXT: vucomisd %xmm1, %xmm0 389 ; AVX-NEXT: cmovaeq %rax, %rdx 390 ; AVX-NEXT: vmovq %rdx, %xmm3 391 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 392 ; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm4 393 ; AVX-NEXT: vcvttsd2si %xmm4, %rax 394 ; AVX-NEXT: xorq %rcx, %rax 395 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 396 ; AVX-NEXT: vucomisd %xmm1, %xmm0 397 ; AVX-NEXT: cmovaeq %rax, %rcx 398 ; AVX-NEXT: vmovq %rcx, %xmm0 399 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 400 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 401 ; AVX-NEXT: retq 402 %cvt = fptoui <4 x double> %a to <4 x i64> 403 ret <4 x i64> %cvt 404 } 405 406 define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { 407 ; SSE-LABEL: fptoui_4f64_to_4i32: 408 ; SSE: # BB#0: 409 ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 410 ; SSE-NEXT: movapd %xmm1, %xmm3 411 ; SSE-NEXT: subsd %xmm2, %xmm3 412 ; SSE-NEXT: cvttsd2si %xmm3, %rcx 413 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 414 ; SSE-NEXT: xorq %rax, %rcx 415 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 416 ; SSE-NEXT: ucomisd %xmm2, %xmm1 417 ; SSE-NEXT: cmovaeq %rcx, %rdx 418 ; SSE-NEXT: movd %rdx, %xmm3 419 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 420 ; SSE-NEXT: movapd %xmm1, %xmm4 421 ; SSE-NEXT: subsd %xmm2, %xmm4 422 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 423 ; SSE-NEXT: xorq %rax, %rcx 424 ; SSE-NEXT: cvttsd2si %xmm1, %rdx 425 ; SSE-NEXT: ucomisd %xmm2, %xmm1 426 ; SSE-NEXT: cmovaeq %rcx, %rdx 427 ; SSE-NEXT: movd %rdx, %xmm1 428 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0] 429 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] 430 ; SSE-NEXT: movapd %xmm0, %xmm3 431 ; SSE-NEXT: subsd %xmm2, %xmm3 432 ; SSE-NEXT: cvttsd2si %xmm3, %rcx 433 ; SSE-NEXT: xorq %rax, %rcx 434 ; SSE-NEXT: cvttsd2si %xmm0, %rdx 435 ; SSE-NEXT: ucomisd %xmm2, %xmm0 436 ; SSE-NEXT: cmovaeq %rcx, %rdx 437 ; SSE-NEXT: movd %rdx, %xmm3 438 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 439 ; SSE-NEXT: movapd %xmm0, %xmm4 440 ; SSE-NEXT: subsd %xmm2, %xmm4 441 ; SSE-NEXT: cvttsd2si %xmm4, %rcx 442 ; SSE-NEXT: xorq %rax, %rcx 443 ; SSE-NEXT: cvttsd2si %xmm0, %rax 444 ; SSE-NEXT: ucomisd %xmm2, %xmm0 445 ; SSE-NEXT: cmovaeq %rcx, %rax 446 ; SSE-NEXT: movd %rax, %xmm0 447 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 448 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] 449 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 450 ; SSE-NEXT: retq 451 ; 452 ; AVX-LABEL: fptoui_4f64_to_4i32: 453 ; AVX: # BB#0: 454 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 455 ; AVX-NEXT: vcvttsd2si %xmm1, %rax 456 ; AVX-NEXT: vcvttsd2si %xmm0, %rcx 457 ; AVX-NEXT: vmovd %ecx, %xmm1 458 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 459 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 460 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 461 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 462 ; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 463 ; AVX-NEXT: vcvttsd2si %xmm0, %rax 464 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 465 ; AVX-NEXT: vzeroupper 466 ; AVX-NEXT: retq 467 %cvt = fptoui <4 x double> %a to <4 x i32> 468 ret <4 x i32> %cvt 469 } 470 471 ; 472 ; Float to Signed Integer 473 ; 474 475 define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { 476 ; SSE-LABEL: fptosi_4f32_to_4i32: 477 ; SSE: # BB#0: 478 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 479 ; SSE-NEXT: retq 480 ; 481 ; AVX-LABEL: fptosi_4f32_to_4i32: 482 ; AVX: # BB#0: 483 ; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 484 ; AVX-NEXT: retq 485 %cvt = fptosi <4 x float> %a to <4 x i32> 486 ret <4 x i32> %cvt 487 } 488 489 define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { 490 ; SSE-LABEL: fptosi_2f32_to_2i64: 491 ; SSE: # BB#0: 492 ; SSE-NEXT: cvttss2si %xmm0, %rax 493 ; SSE-NEXT: movd %rax, %xmm1 494 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 495 ; SSE-NEXT: cvttss2si %xmm0, %rax 496 ; SSE-NEXT: movd %rax, %xmm0 497 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 498 ; SSE-NEXT: movdqa %xmm1, %xmm0 499 ; SSE-NEXT: retq 500 ; 501 ; AVX-LABEL: fptosi_2f32_to_2i64: 502 ; AVX: # BB#0: 503 ; AVX-NEXT: vcvttss2si %xmm0, %rax 504 ; AVX-NEXT: vmovq %rax, %xmm1 505 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 506 ; AVX-NEXT: vcvttss2si %xmm0, %rax 507 ; AVX-NEXT: vmovq %rax, %xmm0 508 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 509 ; AVX-NEXT: retq 510 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 511 %cvt = fptosi <2 x float> %shuf to <2 x i64> 512 ret <2 x i64> %cvt 513 } 514 515 define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { 516 ; SSE-LABEL: fptosi_4f32_to_2i64: 517 ; SSE: # BB#0: 518 ; SSE-NEXT: cvttss2si %xmm0, %rax 519 ; SSE-NEXT: movd %rax, %xmm1 520 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 521 ; SSE-NEXT: cvttss2si %xmm0, %rax 522 ; SSE-NEXT: movd %rax, %xmm0 523 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 524 ; SSE-NEXT: movdqa %xmm1, %xmm0 525 ; SSE-NEXT: retq 526 ; 527 ; AVX-LABEL: fptosi_4f32_to_2i64: 528 ; AVX: # BB#0: 529 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 530 ; AVX-NEXT: vcvttss2si %xmm1, %rax 531 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 532 ; AVX-NEXT: vmovq %rcx, %xmm0 533 ; AVX-NEXT: vmovq %rax, %xmm1 534 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 535 ; AVX-NEXT: retq 536 %cvt = fptosi <4 x float> %a to <4 x i64> 537 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 538 ret <2 x i64> %shuf 539 } 540 541 define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { 542 ; SSE-LABEL: fptosi_8f32_to_8i32: 543 ; SSE: # BB#0: 544 ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 545 ; SSE-NEXT: cvttps2dq %xmm1, %xmm1 546 ; SSE-NEXT: retq 547 ; 548 ; AVX-LABEL: fptosi_8f32_to_8i32: 549 ; AVX: # BB#0: 550 ; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 551 ; AVX-NEXT: retq 552 %cvt = fptosi <8 x float> %a to <8 x i32> 553 ret <8 x i32> %cvt 554 } 555 556 define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { 557 ; SSE-LABEL: fptosi_4f32_to_4i64: 558 ; SSE: # BB#0: 559 ; SSE-NEXT: cvttss2si %xmm0, %rax 560 ; SSE-NEXT: movd %rax, %xmm2 561 ; SSE-NEXT: movaps %xmm0, %xmm1 562 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 563 ; SSE-NEXT: cvttss2si %xmm1, %rax 564 ; SSE-NEXT: movd %rax, %xmm1 565 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 566 ; SSE-NEXT: movaps %xmm0, %xmm1 567 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 568 ; SSE-NEXT: cvttss2si %xmm1, %rax 569 ; SSE-NEXT: movd %rax, %xmm3 570 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 571 ; SSE-NEXT: cvttss2si %xmm0, %rax 572 ; SSE-NEXT: movd %rax, %xmm1 573 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 574 ; SSE-NEXT: movdqa %xmm2, %xmm0 575 ; SSE-NEXT: retq 576 ; 577 ; AVX-LABEL: fptosi_4f32_to_4i64: 578 ; AVX: # BB#0: 579 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 580 ; AVX-NEXT: vcvttss2si %xmm1, %rax 581 ; AVX-NEXT: vmovq %rax, %xmm1 582 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 583 ; AVX-NEXT: vcvttss2si %xmm2, %rax 584 ; AVX-NEXT: vmovq %rax, %xmm2 585 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 586 ; AVX-NEXT: vcvttss2si %xmm0, %rax 587 ; AVX-NEXT: vmovq %rax, %xmm2 588 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 589 ; AVX-NEXT: vcvttss2si %xmm0, %rax 590 ; AVX-NEXT: vmovq %rax, %xmm0 591 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 592 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 593 ; AVX-NEXT: retq 594 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 595 %cvt = fptosi <4 x float> %shuf to <4 x i64> 596 ret <4 x i64> %cvt 597 } 598 599 define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { 600 ; SSE-LABEL: fptosi_8f32_to_4i64: 601 ; SSE: # BB#0: 602 ; SSE-NEXT: cvttss2si %xmm0, %rax 603 ; SSE-NEXT: movd %rax, %xmm2 604 ; SSE-NEXT: movaps %xmm0, %xmm1 605 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] 606 ; SSE-NEXT: cvttss2si %xmm1, %rax 607 ; SSE-NEXT: movd %rax, %xmm1 608 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 609 ; SSE-NEXT: movaps %xmm0, %xmm1 610 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 611 ; SSE-NEXT: cvttss2si %xmm1, %rax 612 ; SSE-NEXT: movd %rax, %xmm3 613 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 614 ; SSE-NEXT: cvttss2si %xmm0, %rax 615 ; SSE-NEXT: movd %rax, %xmm1 616 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 617 ; SSE-NEXT: movdqa %xmm2, %xmm0 618 ; SSE-NEXT: retq 619 ; 620 ; AVX-LABEL: fptosi_8f32_to_4i64: 621 ; AVX: # BB#0: 622 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 623 ; AVX-NEXT: vcvttss2si %xmm1, %rax 624 ; AVX-NEXT: vmovq %rax, %xmm1 625 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 626 ; AVX-NEXT: vcvttss2si %xmm2, %rax 627 ; AVX-NEXT: vmovq %rax, %xmm2 628 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 629 ; AVX-NEXT: vcvttss2si %xmm0, %rax 630 ; AVX-NEXT: vmovq %rax, %xmm2 631 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 632 ; AVX-NEXT: vcvttss2si %xmm0, %rax 633 ; AVX-NEXT: vmovq %rax, %xmm0 634 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 635 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 636 ; AVX-NEXT: retq 637 %cvt = fptosi <8 x float> %a to <8 x i64> 638 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 639 ret <4 x i64> %shuf 640 } 641 642 ; 643 ; Float to Unsigned Integer 644 ; 645 646 define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { 647 ; SSE-LABEL: fptoui_4f32_to_4i32: 648 ; SSE: # BB#0: 649 ; SSE-NEXT: movaps %xmm0, %xmm1 650 ; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] 651 ; SSE-NEXT: cvttss2si %xmm1, %rax 652 ; SSE-NEXT: movd %eax, %xmm1 653 ; SSE-NEXT: movaps %xmm0, %xmm2 654 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] 655 ; SSE-NEXT: cvttss2si %xmm2, %rax 656 ; SSE-NEXT: movd %eax, %xmm2 657 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 658 ; SSE-NEXT: cvttss2si %xmm0, %rax 659 ; SSE-NEXT: movd %eax, %xmm1 660 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 661 ; SSE-NEXT: cvttss2si %xmm0, %rax 662 ; SSE-NEXT: movd %eax, %xmm0 663 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 664 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 665 ; SSE-NEXT: movdqa %xmm1, %xmm0 666 ; SSE-NEXT: retq 667 ; 668 ; AVX-LABEL: fptoui_4f32_to_4i32: 669 ; AVX: # BB#0: 670 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 671 ; AVX-NEXT: vcvttss2si %xmm1, %rax 672 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 673 ; AVX-NEXT: vmovd %ecx, %xmm1 674 ; AVX-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 675 ; AVX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 676 ; AVX-NEXT: vcvttss2si %xmm2, %rax 677 ; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 678 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 679 ; AVX-NEXT: vcvttss2si %xmm0, %rax 680 ; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 681 ; AVX-NEXT: retq 682 %cvt = fptoui <4 x float> %a to <4 x i32> 683 ret <4 x i32> %cvt 684 } 685 686 define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { 687 ; SSE-LABEL: fptoui_2f32_to_2i64: 688 ; SSE: # BB#0: 689 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 690 ; SSE-NEXT: movaps %xmm0, %xmm1 691 ; SSE-NEXT: subss %xmm2, %xmm1 692 ; SSE-NEXT: cvttss2si %xmm1, %rax 693 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 694 ; SSE-NEXT: xorq %rcx, %rax 695 ; SSE-NEXT: cvttss2si %xmm0, %rdx 696 ; SSE-NEXT: ucomiss %xmm2, %xmm0 697 ; SSE-NEXT: cmovaeq %rax, %rdx 698 ; SSE-NEXT: movd %rdx, %xmm1 699 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 700 ; SSE-NEXT: movaps %xmm0, %xmm3 701 ; SSE-NEXT: subss %xmm2, %xmm3 702 ; SSE-NEXT: cvttss2si %xmm3, %rax 703 ; SSE-NEXT: xorq %rcx, %rax 704 ; SSE-NEXT: cvttss2si %xmm0, %rcx 705 ; SSE-NEXT: ucomiss %xmm2, %xmm0 706 ; SSE-NEXT: cmovaeq %rax, %rcx 707 ; SSE-NEXT: movd %rcx, %xmm0 708 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 709 ; SSE-NEXT: movdqa %xmm1, %xmm0 710 ; SSE-NEXT: retq 711 ; 712 ; AVX-LABEL: fptoui_2f32_to_2i64: 713 ; AVX: # BB#0: 714 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 715 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm2 716 ; AVX-NEXT: vcvttss2si %xmm2, %rax 717 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 718 ; AVX-NEXT: xorq %rcx, %rax 719 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 720 ; AVX-NEXT: vucomiss %xmm1, %xmm0 721 ; AVX-NEXT: cmovaeq %rax, %rdx 722 ; AVX-NEXT: vmovq %rdx, %xmm2 723 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 724 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 725 ; AVX-NEXT: vcvttss2si %xmm3, %rax 726 ; AVX-NEXT: xorq %rcx, %rax 727 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 728 ; AVX-NEXT: vucomiss %xmm1, %xmm0 729 ; AVX-NEXT: cmovaeq %rax, %rcx 730 ; AVX-NEXT: vmovq %rcx, %xmm0 731 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 732 ; AVX-NEXT: retq 733 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 734 %cvt = fptoui <2 x float> %shuf to <2 x i64> 735 ret <2 x i64> %cvt 736 } 737 738 define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { 739 ; SSE-LABEL: fptoui_4f32_to_2i64: 740 ; SSE: # BB#0: 741 ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 742 ; SSE-NEXT: movaps %xmm0, %xmm1 743 ; SSE-NEXT: subss %xmm2, %xmm1 744 ; SSE-NEXT: cvttss2si %xmm1, %rax 745 ; SSE-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 746 ; SSE-NEXT: xorq %rcx, %rax 747 ; SSE-NEXT: cvttss2si %xmm0, %rdx 748 ; SSE-NEXT: ucomiss %xmm2, %xmm0 749 ; SSE-NEXT: cmovaeq %rax, %rdx 750 ; SSE-NEXT: movd %rdx, %xmm1 751 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] 752 ; SSE-NEXT: movaps %xmm0, %xmm3 753 ; SSE-NEXT: subss %xmm2, %xmm3 754 ; SSE-NEXT: cvttss2si %xmm3, %rax 755 ; SSE-NEXT: xorq %rcx, %rax 756 ; SSE-NEXT: cvttss2si %xmm0, %rcx 757 ; SSE-NEXT: ucomiss %xmm2, %xmm0 758 ; SSE-NEXT: cmovaeq %rax, %rcx 759 ; SSE-NEXT: movd %rcx, %xmm0 760 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 761 ; SSE-NEXT: movdqa %xmm1, %xmm0 762 ; SSE-NEXT: retq 763 ; 764 ; AVX-LABEL: fptoui_4f32_to_2i64: 765 ; AVX: # BB#0: 766 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 767 ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 768 ; AVX-NEXT: vsubss %xmm2, %xmm1, %xmm3 769 ; AVX-NEXT: vcvttss2si %xmm3, %rax 770 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 771 ; AVX-NEXT: xorq %rcx, %rax 772 ; AVX-NEXT: vcvttss2si %xmm1, %rdx 773 ; AVX-NEXT: vucomiss %xmm2, %xmm1 774 ; AVX-NEXT: cmovaeq %rax, %rdx 775 ; AVX-NEXT: vsubss %xmm2, %xmm0, %xmm1 776 ; AVX-NEXT: vcvttss2si %xmm1, %rax 777 ; AVX-NEXT: xorq %rcx, %rax 778 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 779 ; AVX-NEXT: vucomiss %xmm2, %xmm0 780 ; AVX-NEXT: cmovaeq %rax, %rcx 781 ; AVX-NEXT: vmovq %rcx, %xmm0 782 ; AVX-NEXT: vmovq %rdx, %xmm1 783 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 784 ; AVX-NEXT: retq 785 %cvt = fptoui <4 x float> %a to <4 x i64> 786 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 787 ret <2 x i64> %shuf 788 } 789 790 define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { 791 ; SSE-LABEL: fptoui_8f32_to_8i32: 792 ; SSE: # BB#0: 793 ; SSE-NEXT: movaps %xmm0, %xmm2 794 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] 795 ; SSE-NEXT: cvttss2si %xmm0, %rax 796 ; SSE-NEXT: movd %eax, %xmm0 797 ; SSE-NEXT: movaps %xmm2, %xmm3 798 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 799 ; SSE-NEXT: cvttss2si %xmm3, %rax 800 ; SSE-NEXT: movd %eax, %xmm3 801 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 802 ; SSE-NEXT: cvttss2si %xmm2, %rax 803 ; SSE-NEXT: movd %eax, %xmm0 804 ; SSE-NEXT: shufpd {{.*#+}} xmm2 = xmm2[1,0] 805 ; SSE-NEXT: cvttss2si %xmm2, %rax 806 ; SSE-NEXT: movd %eax, %xmm2 807 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 808 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 809 ; SSE-NEXT: movaps %xmm1, %xmm2 810 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] 811 ; SSE-NEXT: cvttss2si %xmm2, %rax 812 ; SSE-NEXT: movd %eax, %xmm2 813 ; SSE-NEXT: movaps %xmm1, %xmm3 814 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 815 ; SSE-NEXT: cvttss2si %xmm3, %rax 816 ; SSE-NEXT: movd %eax, %xmm3 817 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 818 ; SSE-NEXT: cvttss2si %xmm1, %rax 819 ; SSE-NEXT: movd %eax, %xmm2 820 ; SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1,0] 821 ; SSE-NEXT: cvttss2si %xmm1, %rax 822 ; SSE-NEXT: movd %eax, %xmm1 823 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 824 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 825 ; SSE-NEXT: movdqa %xmm2, %xmm1 826 ; SSE-NEXT: retq 827 ; 828 ; AVX-LABEL: fptoui_8f32_to_8i32: 829 ; AVX: # BB#0: 830 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 831 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 832 ; AVX-NEXT: vcvttss2si %xmm2, %rax 833 ; AVX-NEXT: vcvttss2si %xmm1, %rcx 834 ; AVX-NEXT: vmovd %ecx, %xmm2 835 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 836 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 837 ; AVX-NEXT: vcvttss2si %xmm3, %rax 838 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 839 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 840 ; AVX-NEXT: vcvttss2si %xmm1, %rax 841 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 842 ; AVX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 843 ; AVX-NEXT: vcvttss2si %xmm2, %rax 844 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 845 ; AVX-NEXT: vmovd %ecx, %xmm2 846 ; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 847 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 848 ; AVX-NEXT: vcvttss2si %xmm3, %rax 849 ; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 850 ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 851 ; AVX-NEXT: vcvttss2si %xmm0, %rax 852 ; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 853 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 854 ; AVX-NEXT: retq 855 %cvt = fptoui <8 x float> %a to <8 x i32> 856 ret <8 x i32> %cvt 857 } 858 859 define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { 860 ; SSE-LABEL: fptoui_4f32_to_4i64: 861 ; SSE: # BB#0: 862 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 863 ; SSE-NEXT: movaps %xmm0, %xmm2 864 ; SSE-NEXT: subss %xmm1, %xmm2 865 ; SSE-NEXT: cvttss2si %xmm2, %rcx 866 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 867 ; SSE-NEXT: xorq %rax, %rcx 868 ; SSE-NEXT: cvttss2si %xmm0, %rdx 869 ; SSE-NEXT: ucomiss %xmm1, %xmm0 870 ; SSE-NEXT: cmovaeq %rcx, %rdx 871 ; SSE-NEXT: movd %rdx, %xmm2 872 ; SSE-NEXT: movaps %xmm0, %xmm3 873 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 874 ; SSE-NEXT: movaps %xmm3, %xmm4 875 ; SSE-NEXT: subss %xmm1, %xmm4 876 ; SSE-NEXT: cvttss2si %xmm4, %rcx 877 ; SSE-NEXT: xorq %rax, %rcx 878 ; SSE-NEXT: cvttss2si %xmm3, %rdx 879 ; SSE-NEXT: ucomiss %xmm1, %xmm3 880 ; SSE-NEXT: cmovaeq %rcx, %rdx 881 ; SSE-NEXT: movd %rdx, %xmm3 882 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 883 ; SSE-NEXT: movaps %xmm0, %xmm3 884 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 885 ; SSE-NEXT: movaps %xmm3, %xmm4 886 ; SSE-NEXT: subss %xmm1, %xmm4 887 ; SSE-NEXT: cvttss2si %xmm4, %rcx 888 ; SSE-NEXT: xorq %rax, %rcx 889 ; SSE-NEXT: cvttss2si %xmm3, %rdx 890 ; SSE-NEXT: ucomiss %xmm1, %xmm3 891 ; SSE-NEXT: cmovaeq %rcx, %rdx 892 ; SSE-NEXT: movd %rdx, %xmm3 893 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 894 ; SSE-NEXT: movapd %xmm0, %xmm4 895 ; SSE-NEXT: subss %xmm1, %xmm4 896 ; SSE-NEXT: cvttss2si %xmm4, %rcx 897 ; SSE-NEXT: xorq %rax, %rcx 898 ; SSE-NEXT: cvttss2si %xmm0, %rax 899 ; SSE-NEXT: ucomiss %xmm1, %xmm0 900 ; SSE-NEXT: cmovaeq %rcx, %rax 901 ; SSE-NEXT: movd %rax, %xmm1 902 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 903 ; SSE-NEXT: movdqa %xmm2, %xmm0 904 ; SSE-NEXT: retq 905 ; 906 ; AVX-LABEL: fptoui_4f32_to_4i64: 907 ; AVX: # BB#0: 908 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 909 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 910 ; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 911 ; AVX-NEXT: vcvttss2si %xmm3, %rax 912 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 913 ; AVX-NEXT: xorq %rcx, %rax 914 ; AVX-NEXT: vcvttss2si %xmm2, %rdx 915 ; AVX-NEXT: vucomiss %xmm1, %xmm2 916 ; AVX-NEXT: cmovaeq %rax, %rdx 917 ; AVX-NEXT: vmovq %rdx, %xmm2 918 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 919 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 920 ; AVX-NEXT: vcvttss2si %xmm4, %rax 921 ; AVX-NEXT: xorq %rcx, %rax 922 ; AVX-NEXT: vcvttss2si %xmm3, %rdx 923 ; AVX-NEXT: vucomiss %xmm1, %xmm3 924 ; AVX-NEXT: cmovaeq %rax, %rdx 925 ; AVX-NEXT: vmovq %rdx, %xmm3 926 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 927 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 928 ; AVX-NEXT: vcvttss2si %xmm3, %rax 929 ; AVX-NEXT: xorq %rcx, %rax 930 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 931 ; AVX-NEXT: vucomiss %xmm1, %xmm0 932 ; AVX-NEXT: cmovaeq %rax, %rdx 933 ; AVX-NEXT: vmovq %rdx, %xmm3 934 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 935 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 936 ; AVX-NEXT: vcvttss2si %xmm4, %rax 937 ; AVX-NEXT: xorq %rcx, %rax 938 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 939 ; AVX-NEXT: vucomiss %xmm1, %xmm0 940 ; AVX-NEXT: cmovaeq %rax, %rcx 941 ; AVX-NEXT: vmovq %rcx, %xmm0 942 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 943 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 944 ; AVX-NEXT: retq 945 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 946 %cvt = fptoui <4 x float> %shuf to <4 x i64> 947 ret <4 x i64> %cvt 948 } 949 950 define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { 951 ; SSE-LABEL: fptoui_8f32_to_4i64: 952 ; SSE: # BB#0: 953 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 954 ; SSE-NEXT: movaps %xmm0, %xmm2 955 ; SSE-NEXT: subss %xmm1, %xmm2 956 ; SSE-NEXT: cvttss2si %xmm2, %rcx 957 ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 958 ; SSE-NEXT: xorq %rax, %rcx 959 ; SSE-NEXT: cvttss2si %xmm0, %rdx 960 ; SSE-NEXT: ucomiss %xmm1, %xmm0 961 ; SSE-NEXT: cmovaeq %rcx, %rdx 962 ; SSE-NEXT: movd %rdx, %xmm2 963 ; SSE-NEXT: movaps %xmm0, %xmm3 964 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] 965 ; SSE-NEXT: movaps %xmm3, %xmm4 966 ; SSE-NEXT: subss %xmm1, %xmm4 967 ; SSE-NEXT: cvttss2si %xmm4, %rcx 968 ; SSE-NEXT: xorq %rax, %rcx 969 ; SSE-NEXT: cvttss2si %xmm3, %rdx 970 ; SSE-NEXT: ucomiss %xmm1, %xmm3 971 ; SSE-NEXT: cmovaeq %rcx, %rdx 972 ; SSE-NEXT: movd %rdx, %xmm3 973 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 974 ; SSE-NEXT: movaps %xmm0, %xmm3 975 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] 976 ; SSE-NEXT: movaps %xmm3, %xmm4 977 ; SSE-NEXT: subss %xmm1, %xmm4 978 ; SSE-NEXT: cvttss2si %xmm4, %rcx 979 ; SSE-NEXT: xorq %rax, %rcx 980 ; SSE-NEXT: cvttss2si %xmm3, %rdx 981 ; SSE-NEXT: ucomiss %xmm1, %xmm3 982 ; SSE-NEXT: cmovaeq %rcx, %rdx 983 ; SSE-NEXT: movd %rdx, %xmm3 984 ; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] 985 ; SSE-NEXT: movapd %xmm0, %xmm4 986 ; SSE-NEXT: subss %xmm1, %xmm4 987 ; SSE-NEXT: cvttss2si %xmm4, %rcx 988 ; SSE-NEXT: xorq %rax, %rcx 989 ; SSE-NEXT: cvttss2si %xmm0, %rax 990 ; SSE-NEXT: ucomiss %xmm1, %xmm0 991 ; SSE-NEXT: cmovaeq %rcx, %rax 992 ; SSE-NEXT: movd %rax, %xmm1 993 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 994 ; SSE-NEXT: movdqa %xmm2, %xmm0 995 ; SSE-NEXT: retq 996 ; 997 ; AVX-LABEL: fptoui_8f32_to_4i64: 998 ; AVX: # BB#0: 999 ; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] 1000 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1001 ; AVX-NEXT: vsubss %xmm1, %xmm2, %xmm3 1002 ; AVX-NEXT: vcvttss2si %xmm3, %rax 1003 ; AVX-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 1004 ; AVX-NEXT: xorq %rcx, %rax 1005 ; AVX-NEXT: vcvttss2si %xmm2, %rdx 1006 ; AVX-NEXT: vucomiss %xmm1, %xmm2 1007 ; AVX-NEXT: cmovaeq %rax, %rdx 1008 ; AVX-NEXT: vmovq %rdx, %xmm2 1009 ; AVX-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 1010 ; AVX-NEXT: vsubss %xmm1, %xmm3, %xmm4 1011 ; AVX-NEXT: vcvttss2si %xmm4, %rax 1012 ; AVX-NEXT: xorq %rcx, %rax 1013 ; AVX-NEXT: vcvttss2si %xmm3, %rdx 1014 ; AVX-NEXT: vucomiss %xmm1, %xmm3 1015 ; AVX-NEXT: cmovaeq %rax, %rdx 1016 ; AVX-NEXT: vmovq %rdx, %xmm3 1017 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1018 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm3 1019 ; AVX-NEXT: vcvttss2si %xmm3, %rax 1020 ; AVX-NEXT: xorq %rcx, %rax 1021 ; AVX-NEXT: vcvttss2si %xmm0, %rdx 1022 ; AVX-NEXT: vucomiss %xmm1, %xmm0 1023 ; AVX-NEXT: cmovaeq %rax, %rdx 1024 ; AVX-NEXT: vmovq %rdx, %xmm3 1025 ; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1026 ; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm4 1027 ; AVX-NEXT: vcvttss2si %xmm4, %rax 1028 ; AVX-NEXT: xorq %rcx, %rax 1029 ; AVX-NEXT: vcvttss2si %xmm0, %rcx 1030 ; AVX-NEXT: vucomiss %xmm1, %xmm0 1031 ; AVX-NEXT: cmovaeq %rax, %rcx 1032 ; AVX-NEXT: vmovq %rcx, %xmm0 1033 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1034 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1035 ; AVX-NEXT: retq 1036 %cvt = fptoui <8 x float> %a to <8 x i64> 1037 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1038 ret <4 x i64> %shuf 1039 } 1040 1041 ; 1042 ; Constant Folding 1043 ; 1044 1045 define <2 x i64> @fptosi_2f64_to_2i64_const() { 1046 ; SSE-LABEL: fptosi_2f64_to_2i64_const: 1047 ; SSE: # BB#0: 1048 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1049 ; SSE-NEXT: retq 1050 ; 1051 ; AVX-LABEL: fptosi_2f64_to_2i64_const: 1052 ; AVX: # BB#0: 1053 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] 1054 ; AVX-NEXT: retq 1055 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64> 1056 ret <2 x i64> %cvt 1057 } 1058 1059 define <4 x i32> @fptosi_2f64_to_2i32_const() { 1060 ; SSE-LABEL: fptosi_2f64_to_2i32_const: 1061 ; SSE: # BB#0: 1062 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1063 ; SSE-NEXT: retq 1064 ; 1065 ; AVX-LABEL: fptosi_2f64_to_2i32_const: 1066 ; AVX: # BB#0: 1067 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u> 1068 ; AVX-NEXT: retq 1069 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32> 1070 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1071 ret <4 x i32> %ext 1072 } 1073 1074 define <4 x i64> @fptosi_4f64_to_4i64_const() { 1075 ; SSE-LABEL: fptosi_4f64_to_4i64_const: 1076 ; SSE: # BB#0: 1077 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1078 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613] 1079 ; SSE-NEXT: retq 1080 ; 1081 ; AVX-LABEL: fptosi_4f64_to_4i64_const: 1082 ; AVX: # BB#0: 1083 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1084 ; AVX-NEXT: retq 1085 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64> 1086 ret <4 x i64> %cvt 1087 } 1088 1089 define <4 x i32> @fptosi_4f64_to_4i32_const() { 1090 ; SSE-LABEL: fptosi_4f64_to_4i32_const: 1091 ; SSE: # BB#0: 1092 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1093 ; SSE-NEXT: retq 1094 ; 1095 ; AVX-LABEL: fptosi_4f64_to_4i32_const: 1096 ; AVX: # BB#0: 1097 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1098 ; AVX-NEXT: retq 1099 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32> 1100 ret <4 x i32> %cvt 1101 } 1102 1103 define <2 x i64> @fptoui_2f64_to_2i64_const() { 1104 ; SSE-LABEL: fptoui_2f64_to_2i64_const: 1105 ; SSE: # BB#0: 1106 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1107 ; SSE-NEXT: retq 1108 ; 1109 ; AVX-LABEL: fptoui_2f64_to_2i64_const: 1110 ; AVX: # BB#0: 1111 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] 1112 ; AVX-NEXT: retq 1113 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64> 1114 ret <2 x i64> %cvt 1115 } 1116 1117 define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { 1118 ; SSE-LABEL: fptoui_2f64_to_2i32_const: 1119 ; SSE: # BB#0: 1120 ; SSE-NEXT: movaps {{.*#+}} xmm0 = <2,4,u,u> 1121 ; SSE-NEXT: retq 1122 ; 1123 ; AVX-LABEL: fptoui_2f64_to_2i32_const: 1124 ; AVX: # BB#0: 1125 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = <2,4,u,u> 1126 ; AVX-NEXT: retq 1127 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32> 1128 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1129 ret <4 x i32> %ext 1130 } 1131 1132 define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { 1133 ; SSE-LABEL: fptoui_4f64_to_4i64_const: 1134 ; SSE: # BB#0: 1135 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1136 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8] 1137 ; SSE-NEXT: retq 1138 ; 1139 ; AVX-LABEL: fptoui_4f64_to_4i64_const: 1140 ; AVX: # BB#0: 1141 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] 1142 ; AVX-NEXT: retq 1143 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64> 1144 ret <4 x i64> %cvt 1145 } 1146 1147 define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { 1148 ; SSE-LABEL: fptoui_4f64_to_4i32_const: 1149 ; SSE: # BB#0: 1150 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8] 1151 ; SSE-NEXT: retq 1152 ; 1153 ; AVX-LABEL: fptoui_4f64_to_4i32_const: 1154 ; AVX: # BB#0: 1155 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] 1156 ; AVX-NEXT: retq 1157 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32> 1158 ret <4 x i32> %cvt 1159 } 1160 1161 define <4 x i32> @fptosi_4f32_to_4i32_const() { 1162 ; SSE-LABEL: fptosi_4f32_to_4i32_const: 1163 ; SSE: # BB#0: 1164 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1165 ; SSE-NEXT: retq 1166 ; 1167 ; AVX-LABEL: fptosi_4f32_to_4i32_const: 1168 ; AVX: # BB#0: 1169 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1170 ; AVX-NEXT: retq 1171 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32> 1172 ret <4 x i32> %cvt 1173 } 1174 1175 define <4 x i64> @fptosi_4f32_to_4i64_const() { 1176 ; SSE-LABEL: fptosi_4f32_to_4i64_const: 1177 ; SSE: # BB#0: 1178 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1179 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] 1180 ; SSE-NEXT: retq 1181 ; 1182 ; AVX-LABEL: fptosi_4f32_to_4i64_const: 1183 ; AVX: # BB#0: 1184 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 1185 ; AVX-NEXT: retq 1186 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64> 1187 ret <4 x i64> %cvt 1188 } 1189 1190 define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { 1191 ; SSE-LABEL: fptosi_8f32_to_8i32_const: 1192 ; SSE: # BB#0: 1193 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 1194 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295] 1195 ; SSE-NEXT: retq 1196 ; 1197 ; AVX-LABEL: fptosi_8f32_to_8i32_const: 1198 ; AVX: # BB#0: 1199 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 1200 ; AVX-NEXT: retq 1201 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32> 1202 ret <8 x i32> %cvt 1203 } 1204 1205 define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { 1206 ; SSE-LABEL: fptoui_4f32_to_4i32_const: 1207 ; SSE: # BB#0: 1208 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1209 ; SSE-NEXT: retq 1210 ; 1211 ; AVX-LABEL: fptoui_4f32_to_4i32_const: 1212 ; AVX: # BB#0: 1213 ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] 1214 ; AVX-NEXT: retq 1215 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32> 1216 ret <4 x i32> %cvt 1217 } 1218 1219 define <4 x i64> @fptoui_4f32_to_4i64_const() { 1220 ; SSE-LABEL: fptoui_4f32_to_4i64_const: 1221 ; SSE: # BB#0: 1222 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2] 1223 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8] 1224 ; SSE-NEXT: retq 1225 ; 1226 ; AVX-LABEL: fptoui_4f32_to_4i64_const: 1227 ; AVX: # BB#0: 1228 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 1229 ; AVX-NEXT: retq 1230 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64> 1231 ret <4 x i64> %cvt 1232 } 1233 1234 define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { 1235 ; SSE-LABEL: fptoui_8f32_to_8i32_const: 1236 ; SSE: # BB#0: 1237 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 1238 ; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1] 1239 ; SSE-NEXT: retq 1240 ; 1241 ; AVX-LABEL: fptoui_8f32_to_8i32_const: 1242 ; AVX: # BB#0: 1243 ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 1244 ; AVX-NEXT: retq 1245 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32> 1246 ret <8 x i32> %cvt 1247 } 1248