1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX 4 5 define <16 x float> @sitof32(<16 x i32> %a) nounwind { 6 ; ALL-LABEL: sitof32: 7 ; ALL: ## BB#0: 8 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 9 ; ALL-NEXT: retq 10 %b = sitofp <16 x i32> %a to <16 x float> 11 ret <16 x float> %b 12 } 13 14 define <8 x double> @sltof864(<8 x i64> %a) { 15 ; KNL-LABEL: sltof864: 16 ; KNL: ## BB#0: 17 ; KNL-NEXT: vextracti32x4 $3, %zmm0, %xmm1 18 ; KNL-NEXT: vpextrq $1, %xmm1, %rax 19 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 20 ; KNL-NEXT: vmovq %xmm1, %rax 21 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 22 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 23 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm2 24 ; KNL-NEXT: vpextrq $1, %xmm2, %rax 25 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3 26 ; KNL-NEXT: vmovq %xmm2, %rax 27 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 28 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 29 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 30 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm2 31 ; KNL-NEXT: vpextrq $1, %xmm2, %rax 32 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3 33 ; KNL-NEXT: vmovq %xmm2, %rax 34 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 35 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 36 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 37 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm3 38 ; KNL-NEXT: vmovq %xmm0, %rax 39 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 40 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 41 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 42 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 43 ; KNL-NEXT: retq 44 ; 45 ; SKX-LABEL: sltof864: 46 ; SKX: ## BB#0: 47 ; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 48 ; SKX-NEXT: retq 49 %b = sitofp <8 x i64> %a to <8 x double> 50 ret <8 x double> %b 51 } 52 53 define <4 x double> @sltof464(<4 x i64> %a) { 54 ; KNL-LABEL: sltof464: 55 ; KNL: ## BB#0: 56 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 57 ; KNL-NEXT: vpextrq $1, %xmm1, %rax 58 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 59 ; KNL-NEXT: vmovq %xmm1, %rax 60 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 61 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 62 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 63 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 64 ; KNL-NEXT: vmovq %xmm0, %rax 65 ; KNL-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 66 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 67 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 68 ; KNL-NEXT: retq 69 ; 70 ; SKX-LABEL: sltof464: 71 ; SKX: ## BB#0: 72 ; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 73 ; SKX-NEXT: retq 74 %b = sitofp <4 x i64> %a to <4 x double> 75 ret <4 x double> %b 76 } 77 78 define <2 x float> @sltof2f32(<2 x i64> %a) { 79 ; KNL-LABEL: sltof2f32: 80 ; KNL: ## BB#0: 81 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 82 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 83 ; KNL-NEXT: vmovq %xmm0, %rax 84 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 85 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 86 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 87 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 88 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 89 ; KNL-NEXT: retq 90 ; 91 ; SKX-LABEL: sltof2f32: 92 ; SKX: ## BB#0: 93 ; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 94 ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 95 ; SKX-NEXT: retq 96 %b = sitofp <2 x i64> %a to <2 x float> 97 ret <2 x float>%b 98 } 99 100 define <4 x float> @sltof4f32_mem(<4 x i64>* %a) { 101 ; KNL-LABEL: sltof4f32_mem: 102 ; KNL: ## BB#0: 103 ; KNL-NEXT: vmovdqu (%rdi), %ymm0 104 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 105 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 106 ; KNL-NEXT: vmovq %xmm0, %rax 107 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 108 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 109 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 110 ; KNL-NEXT: vmovq %xmm0, %rax 111 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 112 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 113 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 114 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 115 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 116 ; KNL-NEXT: retq 117 ; 118 ; SKX-LABEL: sltof4f32_mem: 119 ; SKX: ## BB#0: 120 ; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 121 ; SKX-NEXT: retq 122 %a1 = load <4 x i64>, <4 x i64>* %a, align 8 123 %b = sitofp <4 x i64> %a1 to <4 x float> 124 ret <4 x float>%b 125 } 126 127 define <4 x i64> @f64tosl(<4 x double> %a) { 128 ; KNL-LABEL: f64tosl: 129 ; KNL: ## BB#0: 130 ; KNL-NEXT: vextractf128 $1, %ymm0, %xmm1 131 ; KNL-NEXT: vcvttsd2si %xmm1, %rax 132 ; KNL-NEXT: vmovq %rax, %xmm2 133 ; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 134 ; KNL-NEXT: vcvttsd2si %xmm1, %rax 135 ; KNL-NEXT: vmovq %rax, %xmm1 136 ; KNL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 137 ; KNL-NEXT: vcvttsd2si %xmm0, %rax 138 ; KNL-NEXT: vmovq %rax, %xmm2 139 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 140 ; KNL-NEXT: vcvttsd2si %xmm0, %rax 141 ; KNL-NEXT: vmovq %rax, %xmm0 142 ; KNL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 143 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 144 ; KNL-NEXT: retq 145 ; 146 ; SKX-LABEL: f64tosl: 147 ; SKX: ## BB#0: 148 ; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 149 ; SKX-NEXT: retq 150 %b = fptosi <4 x double> %a to <4 x i64> 151 ret <4 x i64> %b 152 } 153 154 define <4 x i64> @f32tosl(<4 x float> %a) { 155 ; KNL-LABEL: f32tosl: 156 ; KNL: ## BB#0: 157 ; KNL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 158 ; KNL-NEXT: vcvttss2si %xmm1, %rax 159 ; KNL-NEXT: vmovq %rax, %xmm1 160 ; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] 161 ; KNL-NEXT: vcvttss2si %xmm2, %rax 162 ; KNL-NEXT: vmovq %rax, %xmm2 163 ; KNL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 164 ; KNL-NEXT: vcvttss2si %xmm0, %rax 165 ; KNL-NEXT: vmovq %rax, %xmm2 166 ; KNL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 167 ; KNL-NEXT: vcvttss2si %xmm0, %rax 168 ; KNL-NEXT: vmovq %rax, %xmm0 169 ; KNL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 170 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 171 ; KNL-NEXT: retq 172 ; 173 ; SKX-LABEL: f32tosl: 174 ; SKX: ## BB#0: 175 ; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 176 ; SKX-NEXT: retq 177 %b = fptosi <4 x float> %a to <4 x i64> 178 ret <4 x i64> %b 179 } 180 181 define <4 x float> @sltof432(<4 x i64> %a) { 182 ; KNL-LABEL: sltof432: 183 ; KNL: ## BB#0: 184 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 185 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 186 ; KNL-NEXT: vmovq %xmm0, %rax 187 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 188 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 189 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 190 ; KNL-NEXT: vmovq %xmm0, %rax 191 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 192 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 193 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 194 ; KNL-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 195 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 196 ; KNL-NEXT: retq 197 ; 198 ; SKX-LABEL: sltof432: 199 ; SKX: ## BB#0: 200 ; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 201 ; SKX-NEXT: retq 202 %b = sitofp <4 x i64> %a to <4 x float> 203 ret <4 x float> %b 204 } 205 206 define <4 x float> @ultof432(<4 x i64> %a) { 207 ; KNL-LABEL: ultof432: 208 ; KNL: ## BB#0: 209 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 210 ; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm1 211 ; KNL-NEXT: vmovq %xmm0, %rax 212 ; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 213 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 214 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 215 ; KNL-NEXT: vmovq %xmm0, %rax 216 ; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm2 217 ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 218 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 219 ; KNL-NEXT: vcvtusi2ssq %rax, %xmm0, %xmm0 220 ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 221 ; KNL-NEXT: retq 222 ; 223 ; SKX-LABEL: ultof432: 224 ; SKX: ## BB#0: 225 ; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 226 ; SKX-NEXT: retq 227 %b = uitofp <4 x i64> %a to <4 x float> 228 ret <4 x float> %b 229 } 230 231 define <8 x double> @ultof64(<8 x i64> %a) { 232 ; KNL-LABEL: ultof64: 233 ; KNL: ## BB#0: 234 ; KNL-NEXT: vextracti32x4 $3, %zmm0, %xmm1 235 ; KNL-NEXT: vpextrq $1, %xmm1, %rax 236 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 237 ; KNL-NEXT: vmovq %xmm1, %rax 238 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1 239 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 240 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm2 241 ; KNL-NEXT: vpextrq $1, %xmm2, %rax 242 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 243 ; KNL-NEXT: vmovq %xmm2, %rax 244 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 245 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 246 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 247 ; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm2 248 ; KNL-NEXT: vpextrq $1, %xmm2, %rax 249 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 250 ; KNL-NEXT: vmovq %xmm2, %rax 251 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm2 252 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 253 ; KNL-NEXT: vpextrq $1, %xmm0, %rax 254 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm3 255 ; KNL-NEXT: vmovq %xmm0, %rax 256 ; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0 257 ; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 258 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 259 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 260 ; KNL-NEXT: retq 261 ; 262 ; SKX-LABEL: ultof64: 263 ; SKX: ## BB#0: 264 ; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 265 ; SKX-NEXT: retq 266 %b = uitofp <8 x i64> %a to <8 x double> 267 ret <8 x double> %b 268 } 269 270 define <16 x i32> @fptosi00(<16 x float> %a) nounwind { 271 ; ALL-LABEL: fptosi00: 272 ; ALL: ## BB#0: 273 ; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 274 ; ALL-NEXT: retq 275 %b = fptosi <16 x float> %a to <16 x i32> 276 ret <16 x i32> %b 277 } 278 279 define <16 x i32> @fptoui00(<16 x float> %a) nounwind { 280 ; ALL-LABEL: fptoui00: 281 ; ALL: ## BB#0: 282 ; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 283 ; ALL-NEXT: retq 284 %b = fptoui <16 x float> %a to <16 x i32> 285 ret <16 x i32> %b 286 } 287 288 define <8 x i32> @fptoui_256(<8 x float> %a) nounwind { 289 ; KNL-LABEL: fptoui_256: 290 ; KNL: ## BB#0: 291 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 292 ; KNL-NEXT: vcvttps2udq %zmm0, %zmm0 293 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 294 ; KNL-NEXT: retq 295 ; 296 ; SKX-LABEL: fptoui_256: 297 ; SKX: ## BB#0: 298 ; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 299 ; SKX-NEXT: retq 300 %b = fptoui <8 x float> %a to <8 x i32> 301 ret <8 x i32> %b 302 } 303 304 define <4 x i32> @fptoui_128(<4 x float> %a) nounwind { 305 ; KNL-LABEL: fptoui_128: 306 ; KNL: ## BB#0: 307 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 308 ; KNL-NEXT: vcvttps2udq %zmm0, %zmm0 309 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 310 ; KNL-NEXT: retq 311 ; 312 ; SKX-LABEL: fptoui_128: 313 ; SKX: ## BB#0: 314 ; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 315 ; SKX-NEXT: retq 316 %b = fptoui <4 x float> %a to <4 x i32> 317 ret <4 x i32> %b 318 } 319 320 define <8 x i32> @fptoui01(<8 x double> %a) nounwind { 321 ; ALL-LABEL: fptoui01: 322 ; ALL: ## BB#0: 323 ; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0 324 ; ALL-NEXT: retq 325 %b = fptoui <8 x double> %a to <8 x i32> 326 ret <8 x i32> %b 327 } 328 329 define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind { 330 ; KNL-LABEL: fptoui_256d: 331 ; KNL: ## BB#0: 332 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 333 ; KNL-NEXT: vcvttpd2udq %zmm0, %ymm0 334 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 335 ; KNL-NEXT: retq 336 ; 337 ; SKX-LABEL: fptoui_256d: 338 ; SKX: ## BB#0: 339 ; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 340 ; SKX-NEXT: retq 341 %b = fptoui <4 x double> %a to <4 x i32> 342 ret <4 x i32> %b 343 } 344 345 define <8 x double> @sitof64(<8 x i32> %a) { 346 ; ALL-LABEL: sitof64: 347 ; ALL: ## BB#0: 348 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 349 ; ALL-NEXT: retq 350 %b = sitofp <8 x i32> %a to <8 x double> 351 ret <8 x double> %b 352 } 353 354 define <8 x i32> @fptosi01(<8 x double> %a) { 355 ; ALL-LABEL: fptosi01: 356 ; ALL: ## BB#0: 357 ; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0 358 ; ALL-NEXT: retq 359 %b = fptosi <8 x double> %a to <8 x i32> 360 ret <8 x i32> %b 361 } 362 363 define <4 x i32> @fptosi03(<4 x double> %a) { 364 ; KNL-LABEL: fptosi03: 365 ; KNL: ## BB#0: 366 ; KNL-NEXT: vcvttpd2dqy %ymm0, %xmm0 367 ; KNL-NEXT: retq 368 ; 369 ; SKX-LABEL: fptosi03: 370 ; SKX: ## BB#0: 371 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 372 ; SKX-NEXT: retq 373 %b = fptosi <4 x double> %a to <4 x i32> 374 ret <4 x i32> %b 375 } 376 377 define <16 x float> @fptrunc00(<16 x double> %b) nounwind { 378 ; KNL-LABEL: fptrunc00: 379 ; KNL: ## BB#0: 380 ; KNL-NEXT: vcvtpd2ps %zmm0, %ymm0 381 ; KNL-NEXT: vcvtpd2ps %zmm1, %ymm1 382 ; KNL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 383 ; KNL-NEXT: retq 384 ; 385 ; SKX-LABEL: fptrunc00: 386 ; SKX: ## BB#0: 387 ; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 388 ; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 389 ; SKX-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 390 ; SKX-NEXT: retq 391 %a = fptrunc <16 x double> %b to <16 x float> 392 ret <16 x float> %a 393 } 394 395 define <4 x float> @fptrunc01(<4 x double> %b) { 396 ; KNL-LABEL: fptrunc01: 397 ; KNL: ## BB#0: 398 ; KNL-NEXT: vcvtpd2psy %ymm0, %xmm0 399 ; KNL-NEXT: retq 400 ; 401 ; SKX-LABEL: fptrunc01: 402 ; SKX: ## BB#0: 403 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 404 ; SKX-NEXT: retq 405 %a = fptrunc <4 x double> %b to <4 x float> 406 ret <4 x float> %a 407 } 408 409 define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) { 410 ; KNL-LABEL: fptrunc02: 411 ; KNL: ## BB#0: 412 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 413 ; KNL-NEXT: vcvtpd2psy %ymm0, %xmm0 414 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 415 ; KNL-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 416 ; KNL-NEXT: retq 417 ; 418 ; SKX-LABEL: fptrunc02: 419 ; SKX: ## BB#0: 420 ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 421 ; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 422 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} 423 ; SKX-NEXT: retq 424 %a = fptrunc <4 x double> %b to <4 x float> 425 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer 426 ret <4 x float> %c 427 } 428 429 define <8 x double> @fpext00(<8 x float> %b) nounwind { 430 ; ALL-LABEL: fpext00: 431 ; ALL: ## BB#0: 432 ; ALL-NEXT: vcvtps2pd %ymm0, %zmm0 433 ; ALL-NEXT: retq 434 %a = fpext <8 x float> %b to <8 x double> 435 ret <8 x double> %a 436 } 437 438 define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) { 439 ; KNL-LABEL: fpext01: 440 ; KNL: ## BB#0: 441 ; KNL-NEXT: vcvtps2pd %xmm0, %ymm0 442 ; KNL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1 443 ; KNL-NEXT: vxorpd %ymm2, %ymm2, %ymm2 444 ; KNL-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0 445 ; KNL-NEXT: retq 446 ; 447 ; SKX-LABEL: fpext01: 448 ; SKX: ## BB#0: 449 ; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 450 ; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} 451 ; SKX-NEXT: retq 452 %a = fpext <4 x float> %b to <4 x double> 453 %mask = fcmp ogt <4 x double>%a1, %b1 454 %c = select <4 x i1>%mask, <4 x double>%a, <4 x double>zeroinitializer 455 ret <4 x double> %c 456 } 457 458 define double @funcA(i64* nocapture %e) { 459 ; ALL-LABEL: funcA: 460 ; ALL: ## BB#0: ## %entry 461 ; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 462 ; ALL-NEXT: retq 463 entry: 464 %tmp1 = load i64, i64* %e, align 8 465 %conv = sitofp i64 %tmp1 to double 466 ret double %conv 467 } 468 469 define double @funcB(i32* %e) { 470 ; ALL-LABEL: funcB: 471 ; ALL: ## BB#0: ## %entry 472 ; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 473 ; ALL-NEXT: retq 474 entry: 475 %tmp1 = load i32, i32* %e, align 4 476 %conv = sitofp i32 %tmp1 to double 477 ret double %conv 478 } 479 480 define float @funcC(i32* %e) { 481 ; ALL-LABEL: funcC: 482 ; ALL: ## BB#0: ## %entry 483 ; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 484 ; ALL-NEXT: retq 485 entry: 486 %tmp1 = load i32, i32* %e, align 4 487 %conv = sitofp i32 %tmp1 to float 488 ret float %conv 489 } 490 491 define float @i64tof32(i64* %e) { 492 ; ALL-LABEL: i64tof32: 493 ; ALL: ## BB#0: ## %entry 494 ; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 495 ; ALL-NEXT: retq 496 entry: 497 %tmp1 = load i64, i64* %e, align 8 498 %conv = sitofp i64 %tmp1 to float 499 ret float %conv 500 } 501 502 define void @fpext() { 503 ; ALL-LABEL: fpext: 504 ; ALL: ## BB#0: ## %entry 505 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 506 ; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 507 ; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) 508 ; ALL-NEXT: retq 509 entry: 510 %f = alloca float, align 4 511 %d = alloca double, align 8 512 %tmp = load float, float* %f, align 4 513 %conv = fpext float %tmp to double 514 store double %conv, double* %d, align 8 515 ret void 516 } 517 518 define void @fpround_scalar() nounwind uwtable { 519 ; ALL-LABEL: fpround_scalar: 520 ; ALL: ## BB#0: ## %entry 521 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 522 ; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 523 ; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) 524 ; ALL-NEXT: retq 525 entry: 526 %f = alloca float, align 4 527 %d = alloca double, align 8 528 %tmp = load double, double* %d, align 8 529 %conv = fptrunc double %tmp to float 530 store float %conv, float* %f, align 4 531 ret void 532 } 533 534 define double @long_to_double(i64 %x) { 535 ; ALL-LABEL: long_to_double: 536 ; ALL: ## BB#0: 537 ; ALL-NEXT: vmovq %rdi, %xmm0 538 ; ALL-NEXT: retq 539 %res = bitcast i64 %x to double 540 ret double %res 541 } 542 543 define i64 @double_to_long(double %x) { 544 ; ALL-LABEL: double_to_long: 545 ; ALL: ## BB#0: 546 ; ALL-NEXT: vmovq %xmm0, %rax 547 ; ALL-NEXT: retq 548 %res = bitcast double %x to i64 549 ret i64 %res 550 } 551 552 define float @int_to_float(i32 %x) { 553 ; ALL-LABEL: int_to_float: 554 ; ALL: ## BB#0: 555 ; ALL-NEXT: vmovd %edi, %xmm0 556 ; ALL-NEXT: retq 557 %res = bitcast i32 %x to float 558 ret float %res 559 } 560 561 define i32 @float_to_int(float %x) { 562 ; ALL-LABEL: float_to_int: 563 ; ALL: ## BB#0: 564 ; ALL-NEXT: vmovd %xmm0, %eax 565 ; ALL-NEXT: retq 566 %res = bitcast float %x to i32 567 ret i32 %res 568 } 569 570 define <16 x double> @uitof64(<16 x i32> %a) nounwind { 571 ; KNL-LABEL: uitof64: 572 ; KNL: ## BB#0: 573 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm2 574 ; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 575 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm1 576 ; KNL-NEXT: vmovaps %zmm2, %zmm0 577 ; KNL-NEXT: retq 578 ; 579 ; SKX-LABEL: uitof64: 580 ; SKX: ## BB#0: 581 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 582 ; SKX-NEXT: vextracti32x8 $1, %zmm0, %ymm0 583 ; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 584 ; SKX-NEXT: vmovaps %zmm2, %zmm0 585 ; SKX-NEXT: retq 586 %b = uitofp <16 x i32> %a to <16 x double> 587 ret <16 x double> %b 588 } 589 590 define <4 x double> @uitof64_256(<4 x i32> %a) nounwind { 591 ; KNL-LABEL: uitof64_256: 592 ; KNL: ## BB#0: 593 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 594 ; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 595 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 596 ; KNL-NEXT: retq 597 ; 598 ; SKX-LABEL: uitof64_256: 599 ; SKX: ## BB#0: 600 ; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 601 ; SKX-NEXT: retq 602 %b = uitofp <4 x i32> %a to <4 x double> 603 ret <4 x double> %b 604 } 605 606 define <16 x float> @uitof32(<16 x i32> %a) nounwind { 607 ; ALL-LABEL: uitof32: 608 ; ALL: ## BB#0: 609 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 610 ; ALL-NEXT: retq 611 %b = uitofp <16 x i32> %a to <16 x float> 612 ret <16 x float> %b 613 } 614 615 define <8 x float> @uitof32_256(<8 x i32> %a) nounwind { 616 ; KNL-LABEL: uitof32_256: 617 ; KNL: ## BB#0: 618 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 619 ; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 620 ; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 621 ; KNL-NEXT: retq 622 ; 623 ; SKX-LABEL: uitof32_256: 624 ; SKX: ## BB#0: 625 ; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 626 ; SKX-NEXT: retq 627 %b = uitofp <8 x i32> %a to <8 x float> 628 ret <8 x float> %b 629 } 630 631 define <4 x float> @uitof32_128(<4 x i32> %a) nounwind { 632 ; KNL-LABEL: uitof32_128: 633 ; KNL: ## BB#0: 634 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 635 ; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0 636 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 637 ; KNL-NEXT: retq 638 ; 639 ; SKX-LABEL: uitof32_128: 640 ; SKX: ## BB#0: 641 ; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 642 ; SKX-NEXT: retq 643 %b = uitofp <4 x i32> %a to <4 x float> 644 ret <4 x float> %b 645 } 646 647 define i32 @fptosi02(float %a) nounwind { 648 ; ALL-LABEL: fptosi02: 649 ; ALL: ## BB#0: 650 ; ALL-NEXT: vcvttss2si %xmm0, %eax 651 ; ALL-NEXT: retq 652 %b = fptosi float %a to i32 653 ret i32 %b 654 } 655 656 define i32 @fptoui02(float %a) nounwind { 657 ; ALL-LABEL: fptoui02: 658 ; ALL: ## BB#0: 659 ; ALL-NEXT: vcvttss2usi %xmm0, %eax 660 ; ALL-NEXT: retq 661 %b = fptoui float %a to i32 662 ret i32 %b 663 } 664 665 define float @uitofp02(i32 %a) nounwind { 666 ; ALL-LABEL: uitofp02: 667 ; ALL: ## BB#0: 668 ; ALL-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 669 ; ALL-NEXT: retq 670 %b = uitofp i32 %a to float 671 ret float %b 672 } 673 674 define double @uitofp03(i32 %a) nounwind { 675 ; ALL-LABEL: uitofp03: 676 ; ALL: ## BB#0: 677 ; ALL-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 678 ; ALL-NEXT: retq 679 %b = uitofp i32 %a to double 680 ret double %b 681 } 682 683 define <16 x float> @sitofp_16i1_float(<16 x i32> %a) { 684 ; KNL-LABEL: sitofp_16i1_float: 685 ; KNL: ## BB#0: 686 ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 687 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 688 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 689 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 690 ; KNL-NEXT: vcvtdq2ps %zmm0, %zmm0 691 ; KNL-NEXT: retq 692 ; 693 ; SKX-LABEL: sitofp_16i1_float: 694 ; SKX: ## BB#0: 695 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 696 ; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 697 ; SKX-NEXT: vpmovm2d %k0, %zmm0 698 ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 699 ; SKX-NEXT: retq 700 %mask = icmp slt <16 x i32> %a, zeroinitializer 701 %1 = sitofp <16 x i1> %mask to <16 x float> 702 ret <16 x float> %1 703 } 704 705 define <16 x float> @sitofp_16i8_float(<16 x i8> %a) { 706 ; ALL-LABEL: sitofp_16i8_float: 707 ; ALL: ## BB#0: 708 ; ALL-NEXT: vpmovsxbd %xmm0, %zmm0 709 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 710 ; ALL-NEXT: retq 711 %1 = sitofp <16 x i8> %a to <16 x float> 712 ret <16 x float> %1 713 } 714 715 define <16 x float> @sitofp_16i16_float(<16 x i16> %a) { 716 ; ALL-LABEL: sitofp_16i16_float: 717 ; ALL: ## BB#0: 718 ; ALL-NEXT: vpmovsxwd %ymm0, %zmm0 719 ; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 720 ; ALL-NEXT: retq 721 %1 = sitofp <16 x i16> %a to <16 x float> 722 ret <16 x float> %1 723 } 724 725 define <8 x double> @sitofp_8i16_double(<8 x i16> %a) { 726 ; ALL-LABEL: sitofp_8i16_double: 727 ; ALL: ## BB#0: 728 ; ALL-NEXT: vpmovsxwd %xmm0, %ymm0 729 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 730 ; ALL-NEXT: retq 731 %1 = sitofp <8 x i16> %a to <8 x double> 732 ret <8 x double> %1 733 } 734 735 define <8 x double> @sitofp_8i8_double(<8 x i8> %a) { 736 ; ALL-LABEL: sitofp_8i8_double: 737 ; ALL: ## BB#0: 738 ; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 739 ; ALL-NEXT: vpslld $24, %ymm0, %ymm0 740 ; ALL-NEXT: vpsrad $24, %ymm0, %ymm0 741 ; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0 742 ; ALL-NEXT: retq 743 %1 = sitofp <8 x i8> %a to <8 x double> 744 ret <8 x double> %1 745 } 746 747 define <8 x double> @sitofp_8i1_double(<8 x double> %a) { 748 ; KNL-LABEL: sitofp_8i1_double: 749 ; KNL: ## BB#0: 750 ; KNL-NEXT: vpxord %zmm1, %zmm1, %zmm1 751 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 752 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 753 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 754 ; KNL-NEXT: vpmovqd %zmm0, %ymm0 755 ; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 756 ; KNL-NEXT: retq 757 ; 758 ; SKX-LABEL: sitofp_8i1_double: 759 ; SKX: ## BB#0: 760 ; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1 761 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 762 ; SKX-NEXT: vpmovm2d %k0, %ymm0 763 ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 764 ; SKX-NEXT: retq 765 %cmpres = fcmp ogt <8 x double> %a, zeroinitializer 766 %1 = sitofp <8 x i1> %cmpres to <8 x double> 767 ret <8 x double> %1 768 } 769 770 define <16 x float> @uitofp_16i8(<16 x i8>%a) { 771 ; ALL-LABEL: uitofp_16i8: 772 ; ALL: ## BB#0: 773 ; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 774 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 775 ; ALL-NEXT: retq 776 %b = uitofp <16 x i8> %a to <16 x float> 777 ret <16 x float>%b 778 } 779 780 define <16 x float> @uitofp_16i16(<16 x i16>%a) { 781 ; ALL-LABEL: uitofp_16i16: 782 ; ALL: ## BB#0: 783 ; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 784 ; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0 785 ; ALL-NEXT: retq 786 %b = uitofp <16 x i16> %a to <16 x float> 787 ret <16 x float>%b 788 } 789 790