1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4 ; 5 ; 32-bit tests to make sure we're not doing anything stupid. 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown 7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 9 10 ; 11 ; Signed Integer to Double 12 ; 13 14 define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { 15 ; SSE-LABEL: sitofp_2i64_to_2f64: 16 ; SSE: # BB#0: 17 ; SSE-NEXT: movd %xmm0, %rax 18 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1 19 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 20 ; SSE-NEXT: movd %xmm0, %rax 21 ; SSE-NEXT: xorps %xmm0, %xmm0 22 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 23 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 24 ; SSE-NEXT: movapd %xmm1, %xmm0 25 ; SSE-NEXT: retq 26 ; 27 ; AVX-LABEL: sitofp_2i64_to_2f64: 28 ; AVX: # BB#0: 29 ; AVX-NEXT: vpextrq $1, %xmm0, %rax 30 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 31 ; AVX-NEXT: vmovq %xmm0, %rax 32 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 33 ; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 34 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 35 ; AVX-NEXT: retq 36 %cvt = sitofp <2 x i64> %a to <2 x double> 37 ret <2 x double> %cvt 38 } 39 40 define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) { 41 ; SSE-LABEL: sitofp_2i32_to_2f64: 42 ; SSE: # BB#0: 43 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 44 ; SSE-NEXT: retq 45 ; 46 ; AVX-LABEL: sitofp_2i32_to_2f64: 47 ; AVX: # BB#0: 48 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 49 ; AVX-NEXT: retq 50 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 51 %cvt = sitofp <2 x i32> %shuf to <2 x double> 52 ret <2 x double> %cvt 53 } 54 55 define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) { 56 ; SSE-LABEL: sitofp_4i32_to_2f64: 57 ; SSE: # BB#0: 58 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 59 ; SSE-NEXT: retq 60 ; 61 ; AVX-LABEL: sitofp_4i32_to_2f64: 62 ; AVX: # BB#0: 63 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 64 ; AVX-NEXT: vzeroupper 65 ; AVX-NEXT: retq 66 %cvt = sitofp <4 x i32> %a to <4 x double> 67 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 68 ret <2 x double> %shuf 69 } 70 71 define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) { 72 ; SSE-LABEL: sitofp_2i16_to_2f64: 73 ; SSE: # BB#0: 74 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 75 ; SSE-NEXT: psrad $16, %xmm0 76 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 77 ; SSE-NEXT: retq 78 ; 79 ; AVX-LABEL: sitofp_2i16_to_2f64: 80 ; AVX: # BB#0: 81 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 82 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 83 ; AVX-NEXT: retq 84 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 85 %cvt = sitofp <2 x i16> %shuf to <2 x double> 86 ret <2 x double> %cvt 87 } 88 89 define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) { 90 ; SSE-LABEL: sitofp_8i16_to_2f64: 91 ; SSE: # BB#0: 92 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 93 ; SSE-NEXT: psrad $16, %xmm0 94 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 95 ; SSE-NEXT: retq 96 ; 97 ; AVX1-LABEL: sitofp_8i16_to_2f64: 98 ; AVX1: # BB#0: 99 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 100 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 101 ; AVX1-NEXT: vzeroupper 102 ; AVX1-NEXT: retq 103 ; 104 ; AVX2-LABEL: sitofp_8i16_to_2f64: 105 ; AVX2: # BB#0: 106 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 107 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 108 ; AVX2-NEXT: vzeroupper 109 ; AVX2-NEXT: retq 110 %cvt = sitofp <8 x i16> %a to <8 x double> 111 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 112 ret <2 x double> %shuf 113 } 114 115 define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) { 116 ; SSE-LABEL: sitofp_2i8_to_2f64: 117 ; SSE: # BB#0: 118 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 119 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 120 ; SSE-NEXT: psrad $24, %xmm0 121 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 122 ; SSE-NEXT: retq 123 ; 124 ; AVX-LABEL: sitofp_2i8_to_2f64: 125 ; AVX: # BB#0: 126 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 127 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 128 ; AVX-NEXT: retq 129 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 130 %cvt = sitofp <2 x i8> %shuf to <2 x double> 131 ret <2 x double> %cvt 132 } 133 134 define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) { 135 ; SSE-LABEL: sitofp_16i8_to_2f64: 136 ; SSE: # BB#0: 137 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 139 ; SSE-NEXT: psrad $24, %xmm0 140 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 141 ; SSE-NEXT: retq 142 ; 143 ; AVX1-LABEL: sitofp_16i8_to_2f64: 144 ; AVX1: # BB#0: 145 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 146 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 147 ; AVX1-NEXT: vzeroupper 148 ; AVX1-NEXT: retq 149 ; 150 ; AVX2-LABEL: sitofp_16i8_to_2f64: 151 ; AVX2: # BB#0: 152 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 153 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 154 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 155 ; AVX2-NEXT: vzeroupper 156 ; AVX2-NEXT: retq 157 %cvt = sitofp <16 x i8> %a to <16 x double> 158 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 159 ret <2 x double> %shuf 160 } 161 162 define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { 163 ; SSE-LABEL: sitofp_4i64_to_4f64: 164 ; SSE: # BB#0: 165 ; SSE-NEXT: movd %xmm0, %rax 166 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2 167 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 168 ; SSE-NEXT: movd %xmm0, %rax 169 ; SSE-NEXT: xorps %xmm0, %xmm0 170 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 171 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 172 ; SSE-NEXT: movd %xmm1, %rax 173 ; SSE-NEXT: cvtsi2sdq %rax, %xmm3 174 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 175 ; SSE-NEXT: movd %xmm0, %rax 176 ; SSE-NEXT: xorps %xmm0, %xmm0 177 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 178 ; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] 179 ; SSE-NEXT: movapd %xmm2, %xmm0 180 ; SSE-NEXT: movapd %xmm3, %xmm1 181 ; SSE-NEXT: retq 182 ; 183 ; AVX1-LABEL: sitofp_4i64_to_4f64: 184 ; AVX1: # BB#0: 185 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 186 ; AVX1-NEXT: vpextrq $1, %xmm1, %rax 187 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 188 ; AVX1-NEXT: vmovq %xmm1, %rax 189 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 190 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 191 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax 192 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 193 ; AVX1-NEXT: vmovq %xmm0, %rax 194 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 195 ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 196 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 197 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 198 ; AVX1-NEXT: retq 199 ; 200 ; AVX2-LABEL: sitofp_4i64_to_4f64: 201 ; AVX2: # BB#0: 202 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 203 ; AVX2-NEXT: vpextrq $1, %xmm1, %rax 204 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 205 ; AVX2-NEXT: vmovq %xmm1, %rax 206 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 207 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 208 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax 209 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm2 210 ; AVX2-NEXT: vmovq %xmm0, %rax 211 ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 212 ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm0 213 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 214 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 215 ; AVX2-NEXT: retq 216 %cvt = sitofp <4 x i64> %a to <4 x double> 217 ret <4 x double> %cvt 218 } 219 220 define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) { 221 ; SSE-LABEL: sitofp_4i32_to_4f64: 222 ; SSE: # BB#0: 223 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 224 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 225 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 226 ; SSE-NEXT: movaps %xmm2, %xmm0 227 ; SSE-NEXT: retq 228 ; 229 ; AVX-LABEL: sitofp_4i32_to_4f64: 230 ; AVX: # BB#0: 231 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 232 ; AVX-NEXT: retq 233 %cvt = sitofp <4 x i32> %a to <4 x double> 234 ret <4 x double> %cvt 235 } 236 237 define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) { 238 ; SSE-LABEL: sitofp_4i16_to_4f64: 239 ; SSE: # BB#0: 240 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 241 ; SSE-NEXT: psrad $16, %xmm1 242 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 243 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 244 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 245 ; SSE-NEXT: retq 246 ; 247 ; AVX-LABEL: sitofp_4i16_to_4f64: 248 ; AVX: # BB#0: 249 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 250 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 251 ; AVX-NEXT: retq 252 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 253 %cvt = sitofp <4 x i16> %shuf to <4 x double> 254 ret <4 x double> %cvt 255 } 256 257 define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) { 258 ; SSE-LABEL: sitofp_8i16_to_4f64: 259 ; SSE: # BB#0: 260 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 261 ; SSE-NEXT: psrad $16, %xmm1 262 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 263 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 264 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 265 ; SSE-NEXT: retq 266 ; 267 ; AVX1-LABEL: sitofp_8i16_to_4f64: 268 ; AVX1: # BB#0: 269 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 270 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 271 ; AVX1-NEXT: retq 272 ; 273 ; AVX2-LABEL: sitofp_8i16_to_4f64: 274 ; AVX2: # BB#0: 275 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 276 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 277 ; AVX2-NEXT: retq 278 %cvt = sitofp <8 x i16> %a to <8 x double> 279 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 280 ret <4 x double> %shuf 281 } 282 283 define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) { 284 ; SSE-LABEL: sitofp_4i8_to_4f64: 285 ; SSE: # BB#0: 286 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 287 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 288 ; SSE-NEXT: psrad $24, %xmm1 289 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 290 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 291 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 292 ; SSE-NEXT: retq 293 ; 294 ; AVX-LABEL: sitofp_4i8_to_4f64: 295 ; AVX: # BB#0: 296 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 297 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 298 ; AVX-NEXT: retq 299 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 300 %cvt = sitofp <4 x i8> %shuf to <4 x double> 301 ret <4 x double> %cvt 302 } 303 304 define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) { 305 ; SSE-LABEL: sitofp_16i8_to_4f64: 306 ; SSE: # BB#0: 307 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 308 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 309 ; SSE-NEXT: psrad $24, %xmm1 310 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm0 311 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 312 ; SSE-NEXT: cvtdq2pd %xmm1, %xmm1 313 ; SSE-NEXT: retq 314 ; 315 ; AVX1-LABEL: sitofp_16i8_to_4f64: 316 ; AVX1: # BB#0: 317 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 318 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 319 ; AVX1-NEXT: retq 320 ; 321 ; AVX2-LABEL: sitofp_16i8_to_4f64: 322 ; AVX2: # BB#0: 323 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 324 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 325 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 326 ; AVX2-NEXT: retq 327 %cvt = sitofp <16 x i8> %a to <16 x double> 328 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 329 ret <4 x double> %shuf 330 } 331 332 ; 333 ; Unsigned Integer to Double 334 ; 335 336 define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { 337 ; SSE-LABEL: uitofp_2i64_to_2f64: 338 ; SSE: # BB#0: 339 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] 340 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 341 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 342 ; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] 343 ; SSE-NEXT: subpd %xmm3, %xmm0 344 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 345 ; SSE-NEXT: addpd %xmm4, %xmm0 346 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 347 ; SSE-NEXT: subpd %xmm3, %xmm2 348 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] 349 ; SSE-NEXT: addpd %xmm2, %xmm1 350 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 351 ; SSE-NEXT: retq 352 ; 353 ; AVX-LABEL: uitofp_2i64_to_2f64: 354 ; AVX: # BB#0: 355 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] 356 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 357 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] 358 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 359 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 360 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 361 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 362 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 363 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 364 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] 365 ; AVX-NEXT: retq 366 %cvt = uitofp <2 x i64> %a to <2 x double> 367 ret <2 x double> %cvt 368 } 369 370 define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) { 371 ; SSE-LABEL: uitofp_2i32_to_2f64: 372 ; SSE: # BB#0: 373 ; SSE-NEXT: pxor %xmm1, %xmm1 374 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 375 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] 376 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 377 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 378 ; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] 379 ; SSE-NEXT: subpd %xmm3, %xmm0 380 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 381 ; SSE-NEXT: addpd %xmm4, %xmm0 382 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 383 ; SSE-NEXT: subpd %xmm3, %xmm2 384 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] 385 ; SSE-NEXT: addpd %xmm2, %xmm1 386 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 387 ; SSE-NEXT: retq 388 ; 389 ; AVX-LABEL: uitofp_2i32_to_2f64: 390 ; AVX: # BB#0: 391 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 392 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] 393 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 394 ; AVX-NEXT: vmovapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] 395 ; AVX-NEXT: vsubpd %xmm3, %xmm2, %xmm2 396 ; AVX-NEXT: vhaddpd %xmm2, %xmm2, %xmm2 397 ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 398 ; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 399 ; AVX-NEXT: vsubpd %xmm3, %xmm0, %xmm0 400 ; AVX-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 401 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] 402 ; AVX-NEXT: retq 403 %shuf = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 404 %cvt = uitofp <2 x i32> %shuf to <2 x double> 405 ret <2 x double> %cvt 406 } 407 408 define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { 409 ; SSE-LABEL: uitofp_4i32_to_2f64: 410 ; SSE: # BB#0: 411 ; SSE-NEXT: pxor %xmm1, %xmm1 412 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 413 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0] 414 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 415 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 416 ; SSE-NEXT: movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25] 417 ; SSE-NEXT: subpd %xmm3, %xmm0 418 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 419 ; SSE-NEXT: addpd %xmm4, %xmm0 420 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 421 ; SSE-NEXT: subpd %xmm3, %xmm2 422 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] 423 ; SSE-NEXT: addpd %xmm2, %xmm1 424 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 425 ; SSE-NEXT: retq 426 ; 427 ; AVX1-LABEL: uitofp_4i32_to_2f64: 428 ; AVX1: # BB#0: 429 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 430 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 431 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 432 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 433 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 434 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 435 ; AVX1-NEXT: vzeroupper 436 ; AVX1-NEXT: retq 437 ; 438 ; AVX2-LABEL: uitofp_4i32_to_2f64: 439 ; AVX2: # BB#0: 440 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 441 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 442 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 443 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 444 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 445 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 446 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 447 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 448 ; AVX2-NEXT: vzeroupper 449 ; AVX2-NEXT: retq 450 %cvt = uitofp <4 x i32> %a to <4 x double> 451 %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1> 452 ret <2 x double> %shuf 453 } 454 455 define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) { 456 ; SSE-LABEL: uitofp_2i16_to_2f64: 457 ; SSE: # BB#0: 458 ; SSE-NEXT: pxor %xmm1, %xmm1 459 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 460 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 461 ; SSE-NEXT: retq 462 ; 463 ; AVX-LABEL: uitofp_2i16_to_2f64: 464 ; AVX: # BB#0: 465 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 466 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 467 ; AVX-NEXT: retq 468 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 469 %cvt = uitofp <2 x i16> %shuf to <2 x double> 470 ret <2 x double> %cvt 471 } 472 473 define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) { 474 ; SSE-LABEL: uitofp_8i16_to_2f64: 475 ; SSE: # BB#0: 476 ; SSE-NEXT: pxor %xmm1, %xmm1 477 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 478 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 479 ; SSE-NEXT: retq 480 ; 481 ; AVX1-LABEL: uitofp_8i16_to_2f64: 482 ; AVX1: # BB#0: 483 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 484 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 485 ; AVX1-NEXT: vzeroupper 486 ; AVX1-NEXT: retq 487 ; 488 ; AVX2-LABEL: uitofp_8i16_to_2f64: 489 ; AVX2: # BB#0: 490 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 491 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 492 ; AVX2-NEXT: vzeroupper 493 ; AVX2-NEXT: retq 494 %cvt = uitofp <8 x i16> %a to <8 x double> 495 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1> 496 ret <2 x double> %shuf 497 } 498 499 define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) { 500 ; SSE-LABEL: uitofp_2i8_to_2f64: 501 ; SSE: # BB#0: 502 ; SSE-NEXT: pxor %xmm1, %xmm1 503 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 504 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 505 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 506 ; SSE-NEXT: retq 507 ; 508 ; AVX-LABEL: uitofp_2i8_to_2f64: 509 ; AVX: # BB#0: 510 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 511 ; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 512 ; AVX-NEXT: retq 513 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 514 %cvt = uitofp <2 x i8> %shuf to <2 x double> 515 ret <2 x double> %cvt 516 } 517 518 define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) { 519 ; SSE-LABEL: uitofp_16i8_to_2f64: 520 ; SSE: # BB#0: 521 ; SSE-NEXT: pxor %xmm1, %xmm1 522 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 523 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 524 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 525 ; SSE-NEXT: retq 526 ; 527 ; AVX1-LABEL: uitofp_16i8_to_2f64: 528 ; AVX1: # BB#0: 529 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 530 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 531 ; AVX1-NEXT: vzeroupper 532 ; AVX1-NEXT: retq 533 ; 534 ; AVX2-LABEL: uitofp_16i8_to_2f64: 535 ; AVX2: # BB#0: 536 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 537 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 538 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 539 ; AVX2-NEXT: vzeroupper 540 ; AVX2-NEXT: retq 541 %cvt = uitofp <16 x i8> %a to <16 x double> 542 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1> 543 ret <2 x double> %shuf 544 } 545 546 define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { 547 ; SSE-LABEL: uitofp_4i64_to_4f64: 548 ; SSE: # BB#0: 549 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] 550 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] 551 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 552 ; SSE-NEXT: movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] 553 ; SSE-NEXT: subpd %xmm4, %xmm0 554 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1] 555 ; SSE-NEXT: addpd %xmm5, %xmm0 556 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 557 ; SSE-NEXT: subpd %xmm4, %xmm3 558 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1] 559 ; SSE-NEXT: addpd %xmm3, %xmm5 560 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] 561 ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 562 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 563 ; SSE-NEXT: subpd %xmm4, %xmm1 564 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1] 565 ; SSE-NEXT: addpd %xmm5, %xmm1 566 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 567 ; SSE-NEXT: subpd %xmm4, %xmm3 568 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1] 569 ; SSE-NEXT: addpd %xmm3, %xmm2 570 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 571 ; SSE-NEXT: retq 572 ; 573 ; AVX1-LABEL: uitofp_4i64_to_4f64: 574 ; AVX1: # BB#0: 575 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 576 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] 577 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 578 ; AVX1-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] 579 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 580 ; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 581 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 582 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 583 ; AVX1-NEXT: vsubpd %xmm4, %xmm1, %xmm1 584 ; AVX1-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 585 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0] 586 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 587 ; AVX1-NEXT: vsubpd %xmm4, %xmm3, %xmm3 588 ; AVX1-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 589 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 590 ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 591 ; AVX1-NEXT: vsubpd %xmm4, %xmm0, %xmm0 592 ; AVX1-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 593 ; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] 594 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 595 ; AVX1-NEXT: retq 596 ; 597 ; AVX2-LABEL: uitofp_4i64_to_4f64: 598 ; AVX2: # BB#0: 599 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 600 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0] 601 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 602 ; AVX2-NEXT: vmovapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25] 603 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 604 ; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 605 ; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 606 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 607 ; AVX2-NEXT: vsubpd %xmm4, %xmm1, %xmm1 608 ; AVX2-NEXT: vhaddpd %xmm1, %xmm1, %xmm1 609 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm1[0] 610 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 611 ; AVX2-NEXT: vsubpd %xmm4, %xmm3, %xmm3 612 ; AVX2-NEXT: vhaddpd %xmm3, %xmm3, %xmm3 613 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 614 ; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 615 ; AVX2-NEXT: vsubpd %xmm4, %xmm0, %xmm0 616 ; AVX2-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 617 ; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm3[0],xmm0[0] 618 ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 619 ; AVX2-NEXT: retq 620 %cvt = uitofp <4 x i64> %a to <4 x double> 621 ret <4 x double> %cvt 622 } 623 624 define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) { 625 ; SSE-LABEL: uitofp_4i32_to_4f64: 626 ; SSE: # BB#0: 627 ; SSE-NEXT: movdqa %xmm0, %xmm2 628 ; SSE-NEXT: pxor %xmm1, %xmm1 629 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 630 ; SSE-NEXT: movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0] 631 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1] 632 ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 633 ; SSE-NEXT: movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25] 634 ; SSE-NEXT: subpd %xmm5, %xmm0 635 ; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1] 636 ; SSE-NEXT: addpd %xmm6, %xmm0 637 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 638 ; SSE-NEXT: subpd %xmm5, %xmm4 639 ; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1] 640 ; SSE-NEXT: addpd %xmm4, %xmm6 641 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm6[0] 642 ; SSE-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] 643 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1] 644 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 645 ; SSE-NEXT: subpd %xmm5, %xmm2 646 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1] 647 ; SSE-NEXT: addpd %xmm2, %xmm1 648 ; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 649 ; SSE-NEXT: subpd %xmm5, %xmm4 650 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1] 651 ; SSE-NEXT: addpd %xmm4, %xmm2 652 ; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 653 ; SSE-NEXT: retq 654 ; 655 ; AVX1-LABEL: uitofp_4i32_to_4f64: 656 ; AVX1: # BB#0: 657 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 658 ; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 659 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 660 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 661 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 662 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 663 ; AVX1-NEXT: retq 664 ; 665 ; AVX2-LABEL: uitofp_4i32_to_4f64: 666 ; AVX2: # BB#0: 667 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 668 ; AVX2-NEXT: vcvtdq2pd %xmm1, %ymm1 669 ; AVX2-NEXT: vbroadcastsd {{.*}}(%rip), %ymm2 670 ; AVX2-NEXT: vmulpd %ymm2, %ymm1, %ymm1 671 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 672 ; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 673 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 674 ; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 675 ; AVX2-NEXT: retq 676 %cvt = uitofp <4 x i32> %a to <4 x double> 677 ret <4 x double> %cvt 678 } 679 680 define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) { 681 ; SSE-LABEL: uitofp_4i16_to_4f64: 682 ; SSE: # BB#0: 683 ; SSE-NEXT: pxor %xmm1, %xmm1 684 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 685 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 686 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 687 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 688 ; SSE-NEXT: movaps %xmm2, %xmm0 689 ; SSE-NEXT: retq 690 ; 691 ; AVX-LABEL: uitofp_4i16_to_4f64: 692 ; AVX: # BB#0: 693 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 694 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 695 ; AVX-NEXT: retq 696 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 697 %cvt = uitofp <4 x i16> %shuf to <4 x double> 698 ret <4 x double> %cvt 699 } 700 701 define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) { 702 ; SSE-LABEL: uitofp_8i16_to_4f64: 703 ; SSE: # BB#0: 704 ; SSE-NEXT: pxor %xmm1, %xmm1 705 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 706 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 707 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 708 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 709 ; SSE-NEXT: movaps %xmm2, %xmm0 710 ; SSE-NEXT: retq 711 ; 712 ; AVX1-LABEL: uitofp_8i16_to_4f64: 713 ; AVX1: # BB#0: 714 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 715 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 716 ; AVX1-NEXT: retq 717 ; 718 ; AVX2-LABEL: uitofp_8i16_to_4f64: 719 ; AVX2: # BB#0: 720 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 721 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 722 ; AVX2-NEXT: retq 723 %cvt = uitofp <8 x i16> %a to <8 x double> 724 %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 725 ret <4 x double> %shuf 726 } 727 728 define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) { 729 ; SSE-LABEL: uitofp_4i8_to_4f64: 730 ; SSE: # BB#0: 731 ; SSE-NEXT: pxor %xmm1, %xmm1 732 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 733 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 734 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 735 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 736 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 737 ; SSE-NEXT: movaps %xmm2, %xmm0 738 ; SSE-NEXT: retq 739 ; 740 ; AVX-LABEL: uitofp_4i8_to_4f64: 741 ; AVX: # BB#0: 742 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 743 ; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 744 ; AVX-NEXT: retq 745 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 746 %cvt = uitofp <4 x i8> %shuf to <4 x double> 747 ret <4 x double> %cvt 748 } 749 750 define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) { 751 ; SSE-LABEL: uitofp_16i8_to_4f64: 752 ; SSE: # BB#0: 753 ; SSE-NEXT: pxor %xmm1, %xmm1 754 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 755 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 756 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm2 757 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 758 ; SSE-NEXT: cvtdq2pd %xmm0, %xmm1 759 ; SSE-NEXT: movaps %xmm2, %xmm0 760 ; SSE-NEXT: retq 761 ; 762 ; AVX1-LABEL: uitofp_16i8_to_4f64: 763 ; AVX1: # BB#0: 764 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 765 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 766 ; AVX1-NEXT: retq 767 ; 768 ; AVX2-LABEL: uitofp_16i8_to_4f64: 769 ; AVX2: # BB#0: 770 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 771 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 772 ; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0 773 ; AVX2-NEXT: retq 774 %cvt = uitofp <16 x i8> %a to <16 x double> 775 %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 776 ret <4 x double> %shuf 777 } 778 779 ; 780 ; Signed Integer to Float 781 ; 782 783 define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { 784 ; SSE-LABEL: sitofp_2i64_to_4f32: 785 ; SSE: # BB#0: 786 ; SSE-NEXT: movd %xmm0, %rax 787 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 788 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 789 ; SSE-NEXT: movd %xmm0, %rax 790 ; SSE-NEXT: xorps %xmm0, %xmm0 791 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 792 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 793 ; SSE-NEXT: movaps %xmm1, %xmm0 794 ; SSE-NEXT: retq 795 ; 796 ; AVX-LABEL: sitofp_2i64_to_4f32: 797 ; AVX: # BB#0: 798 ; AVX-NEXT: vpextrq $1, %xmm0, %rax 799 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 800 ; AVX-NEXT: vmovq %xmm0, %rax 801 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 802 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 803 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 804 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 805 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 806 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 807 ; AVX-NEXT: retq 808 %cvt = sitofp <2 x i64> %a to <2 x float> 809 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 810 ret <4 x float> %ext 811 } 812 813 define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { 814 ; SSE-LABEL: sitofp_4i64_to_4f32_undef: 815 ; SSE: # BB#0: 816 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 817 ; SSE-NEXT: movd %xmm0, %rax 818 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 819 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 820 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 821 ; SSE-NEXT: movd %xmm0, %rax 822 ; SSE-NEXT: xorps %xmm0, %xmm0 823 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 824 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 825 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 826 ; SSE-NEXT: movaps %xmm1, %xmm0 827 ; SSE-NEXT: retq 828 ; 829 ; AVX-LABEL: sitofp_4i64_to_4f32_undef: 830 ; AVX: # BB#0: 831 ; AVX-NEXT: vpextrq $1, %xmm0, %rax 832 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 833 ; AVX-NEXT: vmovq %xmm0, %rax 834 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 835 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 836 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 837 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 838 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 839 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 840 ; AVX-NEXT: retq 841 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 842 %cvt = sitofp <4 x i64> %ext to <4 x float> 843 ret <4 x float> %cvt 844 } 845 846 define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) { 847 ; SSE-LABEL: sitofp_4i32_to_4f32: 848 ; SSE: # BB#0: 849 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 850 ; SSE-NEXT: retq 851 ; 852 ; AVX-LABEL: sitofp_4i32_to_4f32: 853 ; AVX: # BB#0: 854 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 855 ; AVX-NEXT: retq 856 %cvt = sitofp <4 x i32> %a to <4 x float> 857 ret <4 x float> %cvt 858 } 859 860 define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) { 861 ; SSE-LABEL: sitofp_4i16_to_4f32: 862 ; SSE: # BB#0: 863 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 864 ; SSE-NEXT: psrad $16, %xmm0 865 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 866 ; SSE-NEXT: retq 867 ; 868 ; AVX-LABEL: sitofp_4i16_to_4f32: 869 ; AVX: # BB#0: 870 ; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 871 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 872 ; AVX-NEXT: retq 873 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 874 %cvt = sitofp <4 x i16> %shuf to <4 x float> 875 ret <4 x float> %cvt 876 } 877 878 define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) { 879 ; SSE-LABEL: sitofp_8i16_to_4f32: 880 ; SSE: # BB#0: 881 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 882 ; SSE-NEXT: psrad $16, %xmm0 883 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 884 ; SSE-NEXT: retq 885 ; 886 ; AVX1-LABEL: sitofp_8i16_to_4f32: 887 ; AVX1: # BB#0: 888 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 889 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 890 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 891 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 892 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 893 ; AVX1-NEXT: vzeroupper 894 ; AVX1-NEXT: retq 895 ; 896 ; AVX2-LABEL: sitofp_8i16_to_4f32: 897 ; AVX2: # BB#0: 898 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 899 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 900 ; AVX2-NEXT: vzeroupper 901 ; AVX2-NEXT: retq 902 %cvt = sitofp <8 x i16> %a to <8 x float> 903 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 904 ret <4 x float> %shuf 905 } 906 907 define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) { 908 ; SSE-LABEL: sitofp_4i8_to_4f32: 909 ; SSE: # BB#0: 910 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 911 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 912 ; SSE-NEXT: psrad $24, %xmm0 913 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 914 ; SSE-NEXT: retq 915 ; 916 ; AVX-LABEL: sitofp_4i8_to_4f32: 917 ; AVX: # BB#0: 918 ; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 919 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 920 ; AVX-NEXT: retq 921 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 922 %cvt = sitofp <4 x i8> %shuf to <4 x float> 923 ret <4 x float> %cvt 924 } 925 926 define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) { 927 ; SSE-LABEL: sitofp_16i8_to_4f32: 928 ; SSE: # BB#0: 929 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 930 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 931 ; SSE-NEXT: psrad $24, %xmm0 932 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 933 ; SSE-NEXT: retq 934 ; 935 ; AVX1-LABEL: sitofp_16i8_to_4f32: 936 ; AVX1: # BB#0: 937 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 938 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 939 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 940 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 941 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 942 ; AVX1-NEXT: vzeroupper 943 ; AVX1-NEXT: retq 944 ; 945 ; AVX2-LABEL: sitofp_16i8_to_4f32: 946 ; AVX2: # BB#0: 947 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 948 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 949 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 950 ; AVX2-NEXT: vzeroupper 951 ; AVX2-NEXT: retq 952 %cvt = sitofp <16 x i8> %a to <16 x float> 953 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 954 ret <4 x float> %shuf 955 } 956 957 define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { 958 ; SSE-LABEL: sitofp_4i64_to_4f32: 959 ; SSE: # BB#0: 960 ; SSE-NEXT: movd %xmm1, %rax 961 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 962 ; SSE-NEXT: movd %xmm0, %rax 963 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 964 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 965 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 966 ; SSE-NEXT: movd %xmm1, %rax 967 ; SSE-NEXT: xorps %xmm1, %xmm1 968 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 969 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 970 ; SSE-NEXT: movd %xmm0, %rax 971 ; SSE-NEXT: xorps %xmm0, %xmm0 972 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 973 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 974 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 975 ; SSE-NEXT: movaps %xmm2, %xmm0 976 ; SSE-NEXT: retq 977 ; 978 ; AVX1-LABEL: sitofp_4i64_to_4f32: 979 ; AVX1: # BB#0: 980 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax 981 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 982 ; AVX1-NEXT: vmovq %xmm0, %rax 983 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 984 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 985 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 986 ; AVX1-NEXT: vmovq %xmm0, %rax 987 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 988 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 989 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax 990 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 991 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 992 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 993 ; AVX1-NEXT: vzeroupper 994 ; AVX1-NEXT: retq 995 ; 996 ; AVX2-LABEL: sitofp_4i64_to_4f32: 997 ; AVX2: # BB#0: 998 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax 999 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1000 ; AVX2-NEXT: vmovq %xmm0, %rax 1001 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1002 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1003 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1004 ; AVX2-NEXT: vmovq %xmm0, %rax 1005 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1006 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1007 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1008 ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 1009 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 1010 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1011 ; AVX2-NEXT: vzeroupper 1012 ; AVX2-NEXT: retq 1013 %cvt = sitofp <4 x i64> %a to <4 x float> 1014 ret <4 x float> %cvt 1015 } 1016 1017 define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) { 1018 ; SSE-LABEL: sitofp_8i32_to_8f32: 1019 ; SSE: # BB#0: 1020 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1021 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 1022 ; SSE-NEXT: retq 1023 ; 1024 ; AVX-LABEL: sitofp_8i32_to_8f32: 1025 ; AVX: # BB#0: 1026 ; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0 1027 ; AVX-NEXT: retq 1028 %cvt = sitofp <8 x i32> %a to <8 x float> 1029 ret <8 x float> %cvt 1030 } 1031 1032 define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) { 1033 ; SSE-LABEL: sitofp_8i16_to_8f32: 1034 ; SSE: # BB#0: 1035 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1036 ; SSE-NEXT: psrad $16, %xmm1 1037 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2 1038 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 1039 ; SSE-NEXT: psrad $16, %xmm0 1040 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1041 ; SSE-NEXT: movaps %xmm2, %xmm0 1042 ; SSE-NEXT: retq 1043 ; 1044 ; AVX1-LABEL: sitofp_8i16_to_8f32: 1045 ; AVX1: # BB#0: 1046 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1047 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1048 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1049 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1050 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1051 ; AVX1-NEXT: retq 1052 ; 1053 ; AVX2-LABEL: sitofp_8i16_to_8f32: 1054 ; AVX2: # BB#0: 1055 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 1056 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1057 ; AVX2-NEXT: retq 1058 %cvt = sitofp <8 x i16> %a to <8 x float> 1059 ret <8 x float> %cvt 1060 } 1061 1062 define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) { 1063 ; SSE-LABEL: sitofp_8i8_to_8f32: 1064 ; SSE: # BB#0: 1065 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1066 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1067 ; SSE-NEXT: psrad $24, %xmm1 1068 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2 1069 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1070 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1071 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1072 ; SSE-NEXT: psrad $24, %xmm0 1073 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1074 ; SSE-NEXT: movaps %xmm2, %xmm0 1075 ; SSE-NEXT: retq 1076 ; 1077 ; AVX1-LABEL: sitofp_8i8_to_8f32: 1078 ; AVX1: # BB#0: 1079 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1080 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1081 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1082 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1083 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1084 ; AVX1-NEXT: retq 1085 ; 1086 ; AVX2-LABEL: sitofp_8i8_to_8f32: 1087 ; AVX2: # BB#0: 1088 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1089 ; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 1090 ; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 1091 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1092 ; AVX2-NEXT: retq 1093 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1094 %cvt = sitofp <8 x i8> %shuf to <8 x float> 1095 ret <8 x float> %cvt 1096 } 1097 1098 define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) { 1099 ; SSE-LABEL: sitofp_16i8_to_8f32: 1100 ; SSE: # BB#0: 1101 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1102 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 1103 ; SSE-NEXT: psrad $24, %xmm1 1104 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm2 1105 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1106 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1107 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1108 ; SSE-NEXT: psrad $24, %xmm0 1109 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1110 ; SSE-NEXT: movaps %xmm2, %xmm0 1111 ; SSE-NEXT: retq 1112 ; 1113 ; AVX1-LABEL: sitofp_16i8_to_8f32: 1114 ; AVX1: # BB#0: 1115 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 1116 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1117 ; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 1118 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1119 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1120 ; AVX1-NEXT: retq 1121 ; 1122 ; AVX2-LABEL: sitofp_16i8_to_8f32: 1123 ; AVX2: # BB#0: 1124 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 1125 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 1126 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1127 ; AVX2-NEXT: retq 1128 %cvt = sitofp <16 x i8> %a to <16 x float> 1129 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1130 ret <8 x float> %shuf 1131 } 1132 1133 ; 1134 ; Unsigned Integer to Float 1135 ; 1136 1137 define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { 1138 ; SSE-LABEL: uitofp_2i64_to_4f32: 1139 ; SSE: # BB#0: 1140 ; SSE-NEXT: movdqa %xmm0, %xmm1 1141 ; SSE-NEXT: movd %xmm1, %rax 1142 ; SSE-NEXT: movl %eax, %ecx 1143 ; SSE-NEXT: andl $1, %ecx 1144 ; SSE-NEXT: testq %rax, %rax 1145 ; SSE-NEXT: js .LBB38_1 1146 ; SSE-NEXT: # BB#2: 1147 ; SSE-NEXT: xorps %xmm0, %xmm0 1148 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 1149 ; SSE-NEXT: jmp .LBB38_3 1150 ; SSE-NEXT: .LBB38_1: 1151 ; SSE-NEXT: shrq %rax 1152 ; SSE-NEXT: orq %rax, %rcx 1153 ; SSE-NEXT: xorps %xmm0, %xmm0 1154 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm0 1155 ; SSE-NEXT: addss %xmm0, %xmm0 1156 ; SSE-NEXT: .LBB38_3: 1157 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1158 ; SSE-NEXT: movd %xmm1, %rax 1159 ; SSE-NEXT: movl %eax, %ecx 1160 ; SSE-NEXT: andl $1, %ecx 1161 ; SSE-NEXT: testq %rax, %rax 1162 ; SSE-NEXT: js .LBB38_4 1163 ; SSE-NEXT: # BB#5: 1164 ; SSE-NEXT: xorps %xmm1, %xmm1 1165 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 1166 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1167 ; SSE-NEXT: retq 1168 ; SSE-NEXT: .LBB38_4: 1169 ; SSE-NEXT: shrq %rax 1170 ; SSE-NEXT: orq %rax, %rcx 1171 ; SSE-NEXT: xorps %xmm1, %xmm1 1172 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm1 1173 ; SSE-NEXT: addss %xmm1, %xmm1 1174 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1175 ; SSE-NEXT: retq 1176 ; 1177 ; AVX-LABEL: uitofp_2i64_to_4f32: 1178 ; AVX: # BB#0: 1179 ; AVX-NEXT: vpextrq $1, %xmm0, %rax 1180 ; AVX-NEXT: movl %eax, %ecx 1181 ; AVX-NEXT: andl $1, %ecx 1182 ; AVX-NEXT: testq %rax, %rax 1183 ; AVX-NEXT: js .LBB38_1 1184 ; AVX-NEXT: # BB#2: 1185 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1186 ; AVX-NEXT: jmp .LBB38_3 1187 ; AVX-NEXT: .LBB38_1: 1188 ; AVX-NEXT: shrq %rax 1189 ; AVX-NEXT: orq %rax, %rcx 1190 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 1191 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1 1192 ; AVX-NEXT: .LBB38_3: 1193 ; AVX-NEXT: vmovq %xmm0, %rax 1194 ; AVX-NEXT: movl %eax, %ecx 1195 ; AVX-NEXT: andl $1, %ecx 1196 ; AVX-NEXT: testq %rax, %rax 1197 ; AVX-NEXT: js .LBB38_4 1198 ; AVX-NEXT: # BB#5: 1199 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1200 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 1201 ; AVX-NEXT: jmp .LBB38_6 1202 ; AVX-NEXT: .LBB38_4: 1203 ; AVX-NEXT: shrq %rax 1204 ; AVX-NEXT: orq %rax, %rcx 1205 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1206 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 1207 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0 1208 ; AVX-NEXT: .LBB38_6: 1209 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 1210 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1211 ; AVX-NEXT: testq %rax, %rax 1212 ; AVX-NEXT: js .LBB38_8 1213 ; AVX-NEXT: # BB#7: 1214 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1215 ; AVX-NEXT: .LBB38_8: 1216 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 1217 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 1218 ; AVX-NEXT: retq 1219 %cvt = uitofp <2 x i64> %a to <2 x float> 1220 %ext = shufflevector <2 x float> %cvt, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1221 ret <4 x float> %ext 1222 } 1223 1224 define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { 1225 ; SSE-LABEL: uitofp_4i64_to_4f32_undef: 1226 ; SSE: # BB#0: 1227 ; SSE-NEXT: movdqa %xmm0, %xmm1 1228 ; SSE-NEXT: testq %rax, %rax 1229 ; SSE-NEXT: xorps %xmm2, %xmm2 1230 ; SSE-NEXT: js .LBB39_2 1231 ; SSE-NEXT: # BB#1: 1232 ; SSE-NEXT: xorps %xmm2, %xmm2 1233 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 1234 ; SSE-NEXT: .LBB39_2: 1235 ; SSE-NEXT: movd %xmm1, %rax 1236 ; SSE-NEXT: movl %eax, %ecx 1237 ; SSE-NEXT: andl $1, %ecx 1238 ; SSE-NEXT: testq %rax, %rax 1239 ; SSE-NEXT: js .LBB39_3 1240 ; SSE-NEXT: # BB#4: 1241 ; SSE-NEXT: xorps %xmm0, %xmm0 1242 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 1243 ; SSE-NEXT: jmp .LBB39_5 1244 ; SSE-NEXT: .LBB39_3: 1245 ; SSE-NEXT: shrq %rax 1246 ; SSE-NEXT: orq %rax, %rcx 1247 ; SSE-NEXT: xorps %xmm0, %xmm0 1248 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm0 1249 ; SSE-NEXT: addss %xmm0, %xmm0 1250 ; SSE-NEXT: .LBB39_5: 1251 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1252 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1253 ; SSE-NEXT: movd %xmm1, %rax 1254 ; SSE-NEXT: movl %eax, %ecx 1255 ; SSE-NEXT: andl $1, %ecx 1256 ; SSE-NEXT: testq %rax, %rax 1257 ; SSE-NEXT: js .LBB39_6 1258 ; SSE-NEXT: # BB#7: 1259 ; SSE-NEXT: xorps %xmm1, %xmm1 1260 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 1261 ; SSE-NEXT: jmp .LBB39_8 1262 ; SSE-NEXT: .LBB39_6: 1263 ; SSE-NEXT: shrq %rax 1264 ; SSE-NEXT: orq %rax, %rcx 1265 ; SSE-NEXT: xorps %xmm1, %xmm1 1266 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm1 1267 ; SSE-NEXT: addss %xmm1, %xmm1 1268 ; SSE-NEXT: .LBB39_8: 1269 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1270 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1271 ; SSE-NEXT: retq 1272 ; 1273 ; AVX-LABEL: uitofp_4i64_to_4f32_undef: 1274 ; AVX: # BB#0: 1275 ; AVX-NEXT: vpextrq $1, %xmm0, %rax 1276 ; AVX-NEXT: movl %eax, %ecx 1277 ; AVX-NEXT: andl $1, %ecx 1278 ; AVX-NEXT: testq %rax, %rax 1279 ; AVX-NEXT: js .LBB39_1 1280 ; AVX-NEXT: # BB#2: 1281 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1282 ; AVX-NEXT: jmp .LBB39_3 1283 ; AVX-NEXT: .LBB39_1: 1284 ; AVX-NEXT: shrq %rax 1285 ; AVX-NEXT: orq %rax, %rcx 1286 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 1287 ; AVX-NEXT: vaddss %xmm1, %xmm1, %xmm1 1288 ; AVX-NEXT: .LBB39_3: 1289 ; AVX-NEXT: vmovq %xmm0, %rax 1290 ; AVX-NEXT: movl %eax, %ecx 1291 ; AVX-NEXT: andl $1, %ecx 1292 ; AVX-NEXT: testq %rax, %rax 1293 ; AVX-NEXT: js .LBB39_4 1294 ; AVX-NEXT: # BB#5: 1295 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1296 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 1297 ; AVX-NEXT: jmp .LBB39_6 1298 ; AVX-NEXT: .LBB39_4: 1299 ; AVX-NEXT: shrq %rax 1300 ; AVX-NEXT: orq %rax, %rcx 1301 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1302 ; AVX-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 1303 ; AVX-NEXT: vaddss %xmm0, %xmm0, %xmm0 1304 ; AVX-NEXT: .LBB39_6: 1305 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 1306 ; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1307 ; AVX-NEXT: testq %rax, %rax 1308 ; AVX-NEXT: js .LBB39_8 1309 ; AVX-NEXT: # BB#7: 1310 ; AVX-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1311 ; AVX-NEXT: .LBB39_8: 1312 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 1313 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] 1314 ; AVX-NEXT: retq 1315 %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1316 %cvt = uitofp <4 x i64> %ext to <4 x float> 1317 ret <4 x float> %cvt 1318 } 1319 1320 define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) { 1321 ; SSE-LABEL: uitofp_4i32_to_4f32: 1322 ; SSE: # BB#0: 1323 ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 1324 ; SSE-NEXT: pand %xmm0, %xmm1 1325 ; SSE-NEXT: por {{.*}}(%rip), %xmm1 1326 ; SSE-NEXT: psrld $16, %xmm0 1327 ; SSE-NEXT: por {{.*}}(%rip), %xmm0 1328 ; SSE-NEXT: addps {{.*}}(%rip), %xmm0 1329 ; SSE-NEXT: addps %xmm1, %xmm0 1330 ; SSE-NEXT: retq 1331 ; 1332 ; AVX1-LABEL: uitofp_4i32_to_4f32: 1333 ; AVX1: # BB#0: 1334 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 1335 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1336 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 1337 ; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 1338 ; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 1339 ; AVX1-NEXT: retq 1340 ; 1341 ; AVX2-LABEL: uitofp_4i32_to_4f32: 1342 ; AVX2: # BB#0: 1343 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 1344 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1345 ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0 1346 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 1347 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1348 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 1349 ; AVX2-NEXT: vaddps %xmm2, %xmm0, %xmm0 1350 ; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0 1351 ; AVX2-NEXT: retq 1352 %cvt = uitofp <4 x i32> %a to <4 x float> 1353 ret <4 x float> %cvt 1354 } 1355 1356 define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) { 1357 ; SSE-LABEL: uitofp_4i16_to_4f32: 1358 ; SSE: # BB#0: 1359 ; SSE-NEXT: pxor %xmm1, %xmm1 1360 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1361 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1362 ; SSE-NEXT: retq 1363 ; 1364 ; AVX-LABEL: uitofp_4i16_to_4f32: 1365 ; AVX: # BB#0: 1366 ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1367 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1368 ; AVX-NEXT: retq 1369 %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1370 %cvt = uitofp <4 x i16> %shuf to <4 x float> 1371 ret <4 x float> %cvt 1372 } 1373 1374 define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) { 1375 ; SSE-LABEL: uitofp_8i16_to_4f32: 1376 ; SSE: # BB#0: 1377 ; SSE-NEXT: pxor %xmm1, %xmm1 1378 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1379 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1380 ; SSE-NEXT: retq 1381 ; 1382 ; AVX1-LABEL: uitofp_8i16_to_4f32: 1383 ; AVX1: # BB#0: 1384 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1385 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1386 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1387 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1388 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1389 ; AVX1-NEXT: vzeroupper 1390 ; AVX1-NEXT: retq 1391 ; 1392 ; AVX2-LABEL: uitofp_8i16_to_4f32: 1393 ; AVX2: # BB#0: 1394 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1395 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1396 ; AVX2-NEXT: vzeroupper 1397 ; AVX2-NEXT: retq 1398 %cvt = uitofp <8 x i16> %a to <8 x float> 1399 %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1400 ret <4 x float> %shuf 1401 } 1402 1403 define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) { 1404 ; SSE-LABEL: uitofp_4i8_to_4f32: 1405 ; SSE: # BB#0: 1406 ; SSE-NEXT: pxor %xmm1, %xmm1 1407 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1408 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1409 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1410 ; SSE-NEXT: retq 1411 ; 1412 ; AVX-LABEL: uitofp_4i8_to_4f32: 1413 ; AVX: # BB#0: 1414 ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1415 ; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 1416 ; AVX-NEXT: retq 1417 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1418 %cvt = uitofp <4 x i8> %shuf to <4 x float> 1419 ret <4 x float> %cvt 1420 } 1421 1422 define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) { 1423 ; SSE-LABEL: uitofp_16i8_to_4f32: 1424 ; SSE: # BB#0: 1425 ; SSE-NEXT: pxor %xmm1, %xmm1 1426 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1427 ; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1428 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1429 ; SSE-NEXT: retq 1430 ; 1431 ; AVX1-LABEL: uitofp_16i8_to_4f32: 1432 ; AVX1: # BB#0: 1433 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1434 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1435 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1436 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1437 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1438 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1439 ; AVX1-NEXT: vzeroupper 1440 ; AVX1-NEXT: retq 1441 ; 1442 ; AVX2-LABEL: uitofp_16i8_to_4f32: 1443 ; AVX2: # BB#0: 1444 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1445 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1446 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1447 ; AVX2-NEXT: vzeroupper 1448 ; AVX2-NEXT: retq 1449 %cvt = uitofp <16 x i8> %a to <16 x float> 1450 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1451 ret <4 x float> %shuf 1452 } 1453 1454 define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { 1455 ; SSE-LABEL: uitofp_4i64_to_4f32: 1456 ; SSE: # BB#0: 1457 ; SSE-NEXT: movd %xmm1, %rax 1458 ; SSE-NEXT: movl %eax, %ecx 1459 ; SSE-NEXT: andl $1, %ecx 1460 ; SSE-NEXT: testq %rax, %rax 1461 ; SSE-NEXT: js .LBB45_1 1462 ; SSE-NEXT: # BB#2: 1463 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 1464 ; SSE-NEXT: jmp .LBB45_3 1465 ; SSE-NEXT: .LBB45_1: 1466 ; SSE-NEXT: shrq %rax 1467 ; SSE-NEXT: orq %rax, %rcx 1468 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm3 1469 ; SSE-NEXT: addss %xmm3, %xmm3 1470 ; SSE-NEXT: .LBB45_3: 1471 ; SSE-NEXT: movd %xmm0, %rax 1472 ; SSE-NEXT: movl %eax, %ecx 1473 ; SSE-NEXT: andl $1, %ecx 1474 ; SSE-NEXT: testq %rax, %rax 1475 ; SSE-NEXT: js .LBB45_4 1476 ; SSE-NEXT: # BB#5: 1477 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 1478 ; SSE-NEXT: jmp .LBB45_6 1479 ; SSE-NEXT: .LBB45_4: 1480 ; SSE-NEXT: shrq %rax 1481 ; SSE-NEXT: orq %rax, %rcx 1482 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm2 1483 ; SSE-NEXT: addss %xmm2, %xmm2 1484 ; SSE-NEXT: .LBB45_6: 1485 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 1486 ; SSE-NEXT: movd %xmm1, %rax 1487 ; SSE-NEXT: movl %eax, %ecx 1488 ; SSE-NEXT: andl $1, %ecx 1489 ; SSE-NEXT: testq %rax, %rax 1490 ; SSE-NEXT: js .LBB45_7 1491 ; SSE-NEXT: # BB#8: 1492 ; SSE-NEXT: xorps %xmm1, %xmm1 1493 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 1494 ; SSE-NEXT: jmp .LBB45_9 1495 ; SSE-NEXT: .LBB45_7: 1496 ; SSE-NEXT: shrq %rax 1497 ; SSE-NEXT: orq %rax, %rcx 1498 ; SSE-NEXT: xorps %xmm1, %xmm1 1499 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm1 1500 ; SSE-NEXT: addss %xmm1, %xmm1 1501 ; SSE-NEXT: .LBB45_9: 1502 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1503 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1504 ; SSE-NEXT: movd %xmm0, %rax 1505 ; SSE-NEXT: movl %eax, %ecx 1506 ; SSE-NEXT: andl $1, %ecx 1507 ; SSE-NEXT: testq %rax, %rax 1508 ; SSE-NEXT: js .LBB45_10 1509 ; SSE-NEXT: # BB#11: 1510 ; SSE-NEXT: xorps %xmm0, %xmm0 1511 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 1512 ; SSE-NEXT: jmp .LBB45_12 1513 ; SSE-NEXT: .LBB45_10: 1514 ; SSE-NEXT: shrq %rax 1515 ; SSE-NEXT: orq %rax, %rcx 1516 ; SSE-NEXT: xorps %xmm0, %xmm0 1517 ; SSE-NEXT: cvtsi2ssq %rcx, %xmm0 1518 ; SSE-NEXT: addss %xmm0, %xmm0 1519 ; SSE-NEXT: .LBB45_12: 1520 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1521 ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1522 ; SSE-NEXT: movaps %xmm2, %xmm0 1523 ; SSE-NEXT: retq 1524 ; 1525 ; AVX1-LABEL: uitofp_4i64_to_4f32: 1526 ; AVX1: # BB#0: 1527 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1528 ; AVX1-NEXT: movl %eax, %ecx 1529 ; AVX1-NEXT: andl $1, %ecx 1530 ; AVX1-NEXT: testq %rax, %rax 1531 ; AVX1-NEXT: js .LBB45_1 1532 ; AVX1-NEXT: # BB#2: 1533 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1534 ; AVX1-NEXT: jmp .LBB45_3 1535 ; AVX1-NEXT: .LBB45_1: 1536 ; AVX1-NEXT: shrq %rax 1537 ; AVX1-NEXT: orq %rax, %rcx 1538 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 1539 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 1540 ; AVX1-NEXT: .LBB45_3: 1541 ; AVX1-NEXT: vmovq %xmm0, %rax 1542 ; AVX1-NEXT: movl %eax, %ecx 1543 ; AVX1-NEXT: andl $1, %ecx 1544 ; AVX1-NEXT: testq %rax, %rax 1545 ; AVX1-NEXT: js .LBB45_4 1546 ; AVX1-NEXT: # BB#5: 1547 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1548 ; AVX1-NEXT: jmp .LBB45_6 1549 ; AVX1-NEXT: .LBB45_4: 1550 ; AVX1-NEXT: shrq %rax 1551 ; AVX1-NEXT: orq %rax, %rcx 1552 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 1553 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 1554 ; AVX1-NEXT: .LBB45_6: 1555 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1556 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1557 ; AVX1-NEXT: vmovq %xmm0, %rax 1558 ; AVX1-NEXT: movl %eax, %ecx 1559 ; AVX1-NEXT: andl $1, %ecx 1560 ; AVX1-NEXT: testq %rax, %rax 1561 ; AVX1-NEXT: js .LBB45_7 1562 ; AVX1-NEXT: # BB#8: 1563 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1564 ; AVX1-NEXT: jmp .LBB45_9 1565 ; AVX1-NEXT: .LBB45_7: 1566 ; AVX1-NEXT: shrq %rax 1567 ; AVX1-NEXT: orq %rax, %rcx 1568 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 1569 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 1570 ; AVX1-NEXT: .LBB45_9: 1571 ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1572 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1573 ; AVX1-NEXT: movl %eax, %ecx 1574 ; AVX1-NEXT: andl $1, %ecx 1575 ; AVX1-NEXT: testq %rax, %rax 1576 ; AVX1-NEXT: js .LBB45_10 1577 ; AVX1-NEXT: # BB#11: 1578 ; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 1579 ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 1580 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1581 ; AVX1-NEXT: vzeroupper 1582 ; AVX1-NEXT: retq 1583 ; AVX1-NEXT: .LBB45_10: 1584 ; AVX1-NEXT: shrq %rax 1585 ; AVX1-NEXT: orq %rax, %rcx 1586 ; AVX1-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 1587 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 1588 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1589 ; AVX1-NEXT: vzeroupper 1590 ; AVX1-NEXT: retq 1591 ; 1592 ; AVX2-LABEL: uitofp_4i64_to_4f32: 1593 ; AVX2: # BB#0: 1594 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1595 ; AVX2-NEXT: movl %eax, %ecx 1596 ; AVX2-NEXT: andl $1, %ecx 1597 ; AVX2-NEXT: testq %rax, %rax 1598 ; AVX2-NEXT: js .LBB45_1 1599 ; AVX2-NEXT: # BB#2: 1600 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm1 1601 ; AVX2-NEXT: jmp .LBB45_3 1602 ; AVX2-NEXT: .LBB45_1: 1603 ; AVX2-NEXT: shrq %rax 1604 ; AVX2-NEXT: orq %rax, %rcx 1605 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm1 1606 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1 1607 ; AVX2-NEXT: .LBB45_3: 1608 ; AVX2-NEXT: vmovq %xmm0, %rax 1609 ; AVX2-NEXT: movl %eax, %ecx 1610 ; AVX2-NEXT: andl $1, %ecx 1611 ; AVX2-NEXT: testq %rax, %rax 1612 ; AVX2-NEXT: js .LBB45_4 1613 ; AVX2-NEXT: # BB#5: 1614 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1615 ; AVX2-NEXT: jmp .LBB45_6 1616 ; AVX2-NEXT: .LBB45_4: 1617 ; AVX2-NEXT: shrq %rax 1618 ; AVX2-NEXT: orq %rax, %rcx 1619 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 1620 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 1621 ; AVX2-NEXT: .LBB45_6: 1622 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 1623 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1624 ; AVX2-NEXT: vmovq %xmm0, %rax 1625 ; AVX2-NEXT: movl %eax, %ecx 1626 ; AVX2-NEXT: andl $1, %ecx 1627 ; AVX2-NEXT: testq %rax, %rax 1628 ; AVX2-NEXT: js .LBB45_7 1629 ; AVX2-NEXT: # BB#8: 1630 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm2 1631 ; AVX2-NEXT: jmp .LBB45_9 1632 ; AVX2-NEXT: .LBB45_7: 1633 ; AVX2-NEXT: shrq %rax 1634 ; AVX2-NEXT: orq %rax, %rcx 1635 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm2 1636 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 1637 ; AVX2-NEXT: .LBB45_9: 1638 ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 1639 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1640 ; AVX2-NEXT: movl %eax, %ecx 1641 ; AVX2-NEXT: andl $1, %ecx 1642 ; AVX2-NEXT: testq %rax, %rax 1643 ; AVX2-NEXT: js .LBB45_10 1644 ; AVX2-NEXT: # BB#11: 1645 ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 1646 ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm0, %xmm0 1647 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1648 ; AVX2-NEXT: vzeroupper 1649 ; AVX2-NEXT: retq 1650 ; AVX2-NEXT: .LBB45_10: 1651 ; AVX2-NEXT: shrq %rax 1652 ; AVX2-NEXT: orq %rax, %rcx 1653 ; AVX2-NEXT: vcvtsi2ssq %rcx, %xmm0, %xmm0 1654 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0 1655 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1656 ; AVX2-NEXT: vzeroupper 1657 ; AVX2-NEXT: retq 1658 %cvt = uitofp <4 x i64> %a to <4 x float> 1659 ret <4 x float> %cvt 1660 } 1661 1662 define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) { 1663 ; SSE-LABEL: uitofp_8i32_to_8f32: 1664 ; SSE: # BB#0: 1665 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535] 1666 ; SSE-NEXT: movdqa %xmm0, %xmm3 1667 ; SSE-NEXT: pand %xmm2, %xmm3 1668 ; SSE-NEXT: movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200] 1669 ; SSE-NEXT: por %xmm4, %xmm3 1670 ; SSE-NEXT: psrld $16, %xmm0 1671 ; SSE-NEXT: movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928] 1672 ; SSE-NEXT: por %xmm5, %xmm0 1673 ; SSE-NEXT: movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11] 1674 ; SSE-NEXT: addps %xmm6, %xmm0 1675 ; SSE-NEXT: addps %xmm3, %xmm0 1676 ; SSE-NEXT: pand %xmm1, %xmm2 1677 ; SSE-NEXT: por %xmm4, %xmm2 1678 ; SSE-NEXT: psrld $16, %xmm1 1679 ; SSE-NEXT: por %xmm5, %xmm1 1680 ; SSE-NEXT: addps %xmm6, %xmm1 1681 ; SSE-NEXT: addps %xmm2, %xmm1 1682 ; SSE-NEXT: retq 1683 ; 1684 ; AVX1-LABEL: uitofp_8i32_to_8f32: 1685 ; AVX1: # BB#0: 1686 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm1 1687 ; AVX1-NEXT: vcvtdq2ps %ymm1, %ymm1 1688 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 1689 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1690 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 1691 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 1692 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1693 ; AVX1-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 1694 ; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0 1695 ; AVX1-NEXT: retq 1696 ; 1697 ; AVX2-LABEL: uitofp_8i32_to_8f32: 1698 ; AVX2: # BB#0: 1699 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1700 ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1701 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 1702 ; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 1703 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] 1704 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 1705 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1706 ; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0 1707 ; AVX2-NEXT: retq 1708 %cvt = uitofp <8 x i32> %a to <8 x float> 1709 ret <8 x float> %cvt 1710 } 1711 1712 define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) { 1713 ; SSE-LABEL: uitofp_8i16_to_8f32: 1714 ; SSE: # BB#0: 1715 ; SSE-NEXT: pxor %xmm1, %xmm1 1716 ; SSE-NEXT: movdqa %xmm0, %xmm2 1717 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 1718 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2 1719 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1720 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1721 ; SSE-NEXT: movaps %xmm2, %xmm0 1722 ; SSE-NEXT: retq 1723 ; 1724 ; AVX1-LABEL: uitofp_8i16_to_8f32: 1725 ; AVX1: # BB#0: 1726 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1727 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1728 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1729 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1730 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1731 ; AVX1-NEXT: retq 1732 ; 1733 ; AVX2-LABEL: uitofp_8i16_to_8f32: 1734 ; AVX2: # BB#0: 1735 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1736 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1737 ; AVX2-NEXT: retq 1738 %cvt = uitofp <8 x i16> %a to <8 x float> 1739 ret <8 x float> %cvt 1740 } 1741 1742 define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) { 1743 ; SSE-LABEL: uitofp_8i8_to_8f32: 1744 ; SSE: # BB#0: 1745 ; SSE-NEXT: pxor %xmm1, %xmm1 1746 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1747 ; SSE-NEXT: movdqa %xmm0, %xmm2 1748 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 1749 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2 1750 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1751 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1752 ; SSE-NEXT: movaps %xmm2, %xmm0 1753 ; SSE-NEXT: retq 1754 ; 1755 ; AVX1-LABEL: uitofp_8i8_to_8f32: 1756 ; AVX1: # BB#0: 1757 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1758 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 1759 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] 1760 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 1761 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1762 ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1763 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1764 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1765 ; AVX1-NEXT: retq 1766 ; 1767 ; AVX2-LABEL: uitofp_8i8_to_8f32: 1768 ; AVX2: # BB#0: 1769 ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1770 ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 1771 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1772 ; AVX2-NEXT: retq 1773 %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1774 %cvt = uitofp <8 x i8> %shuf to <8 x float> 1775 ret <8 x float> %cvt 1776 } 1777 1778 define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) { 1779 ; SSE-LABEL: uitofp_16i8_to_8f32: 1780 ; SSE: # BB#0: 1781 ; SSE-NEXT: pxor %xmm1, %xmm1 1782 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1783 ; SSE-NEXT: movdqa %xmm0, %xmm2 1784 ; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 1785 ; SSE-NEXT: cvtdq2ps %xmm2, %xmm2 1786 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1787 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm1 1788 ; SSE-NEXT: movaps %xmm2, %xmm0 1789 ; SSE-NEXT: retq 1790 ; 1791 ; AVX1-LABEL: uitofp_16i8_to_8f32: 1792 ; AVX1: # BB#0: 1793 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1794 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1795 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1796 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1797 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1798 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1799 ; AVX1-NEXT: retq 1800 ; 1801 ; AVX2-LABEL: uitofp_16i8_to_8f32: 1802 ; AVX2: # BB#0: 1803 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1804 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1805 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1806 ; AVX2-NEXT: retq 1807 %cvt = uitofp <16 x i8> %a to <16 x float> 1808 %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1809 ret <8 x float> %shuf 1810 } 1811 1812 ; 1813 ; Aggregates 1814 ; 1815 1816 %Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }> 1817 define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) { 1818 ; SSE-LABEL: aggregate_sitofp_8i16_to_8f32: 1819 ; SSE: # BB#0: 1820 ; SSE-NEXT: movq 24(%rdi), %rax 1821 ; SSE-NEXT: movdqu 8(%rdi), %xmm0 1822 ; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1823 ; SSE-NEXT: psrad $16, %xmm1 1824 ; SSE-NEXT: cvtdq2ps %xmm1, %xmm1 1825 ; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] 1826 ; SSE-NEXT: psrad $16, %xmm0 1827 ; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 1828 ; SSE-NEXT: movaps %xmm0, 16(%rax) 1829 ; SSE-NEXT: movaps %xmm1, (%rax) 1830 ; SSE-NEXT: retq 1831 ; 1832 ; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32: 1833 ; AVX1: # BB#0: 1834 ; AVX1-NEXT: movq 24(%rdi), %rax 1835 ; AVX1-NEXT: vmovdqu 8(%rdi), %xmm0 1836 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 1837 ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1838 ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 1839 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1840 ; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0 1841 ; AVX1-NEXT: vmovaps %ymm0, (%rax) 1842 ; AVX1-NEXT: vzeroupper 1843 ; AVX1-NEXT: retq 1844 ; 1845 ; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32: 1846 ; AVX2: # BB#0: 1847 ; AVX2-NEXT: movq 24(%rdi), %rax 1848 ; AVX2-NEXT: vpmovsxwd 8(%rdi), %ymm0 1849 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 1850 ; AVX2-NEXT: vmovaps %ymm0, (%rax) 1851 ; AVX2-NEXT: vzeroupper 1852 ; AVX2-NEXT: retq 1853 %1 = load %Arguments, %Arguments* %a0, align 1 1854 %2 = extractvalue %Arguments %1, 1 1855 %3 = extractvalue %Arguments %1, 2 1856 %4 = sitofp <8 x i16> %2 to <8 x float> 1857 store <8 x float> %4, <8 x float>* %3, align 32 1858 ret void 1859 } 1860