1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F 7 ; 8 ; Just one 32-bit run to make sure we do reasonable things. 9 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE 10 11 define <2 x double> @merge_2f64_f64_23(double* %ptr) nounwind uwtable noinline ssp { 12 ; SSE-LABEL: merge_2f64_f64_23: 13 ; SSE: # BB#0: 14 ; SSE-NEXT: movups 16(%rdi), %xmm0 15 ; SSE-NEXT: retq 16 ; 17 ; AVX-LABEL: merge_2f64_f64_23: 18 ; AVX: # BB#0: 19 ; AVX-NEXT: vmovups 16(%rdi), %xmm0 20 ; AVX-NEXT: retq 21 ; 22 ; X32-SSE-LABEL: merge_2f64_f64_23: 23 ; X32-SSE: # BB#0: 24 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 25 ; X32-SSE-NEXT: movups 16(%eax), %xmm0 26 ; X32-SSE-NEXT: retl 27 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 28 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 29 %val0 = load double, double* %ptr0 30 %val1 = load double, double* %ptr1 31 %res0 = insertelement <2 x double> undef, double %val0, i32 0 32 %res1 = insertelement <2 x double> %res0, double %val1, i32 1 33 ret <2 x double> %res1 34 } 35 36 define <2 x i64> @merge_2i64_i64_12(i64* %ptr) nounwind uwtable noinline ssp { 37 ; SSE-LABEL: merge_2i64_i64_12: 38 ; SSE: # BB#0: 39 ; SSE-NEXT: movups 8(%rdi), %xmm0 40 ; SSE-NEXT: retq 41 ; 42 ; AVX-LABEL: merge_2i64_i64_12: 43 ; AVX: # BB#0: 44 ; AVX-NEXT: vmovups 8(%rdi), %xmm0 45 ; AVX-NEXT: retq 46 ; 47 ; X32-SSE-LABEL: merge_2i64_i64_12: 48 ; X32-SSE: # BB#0: 49 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 50 ; X32-SSE-NEXT: movups 8(%eax), %xmm0 51 ; X32-SSE-NEXT: retl 52 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 53 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2 54 %val0 = load i64, i64* %ptr0 55 %val1 = load i64, i64* %ptr1 56 %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0 57 %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1 58 ret <2 x i64> %res1 59 } 60 61 define <4 x float> @merge_4f32_f32_2345(float* %ptr) nounwind uwtable noinline ssp { 62 ; SSE-LABEL: merge_4f32_f32_2345: 63 ; SSE: # BB#0: 64 ; SSE-NEXT: movups 8(%rdi), %xmm0 65 ; SSE-NEXT: retq 66 ; 67 ; AVX-LABEL: merge_4f32_f32_2345: 68 ; AVX: # BB#0: 69 ; AVX-NEXT: vmovups 8(%rdi), %xmm0 70 ; AVX-NEXT: retq 71 ; 72 ; X32-SSE-LABEL: merge_4f32_f32_2345: 73 ; X32-SSE: # BB#0: 74 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 75 ; X32-SSE-NEXT: movups 8(%eax), %xmm0 76 ; X32-SSE-NEXT: retl 77 %ptr0 = getelementptr inbounds float, float* %ptr, i64 2 78 %ptr1 = getelementptr inbounds float, float* %ptr, i64 3 79 %ptr2 = getelementptr inbounds float, float* %ptr, i64 4 80 %ptr3 = getelementptr inbounds float, float* %ptr, i64 5 81 %val0 = load float, float* %ptr0 82 %val1 = load float, float* %ptr1 83 %val2 = load float, float* %ptr2 84 %val3 = load float, float* %ptr3 85 %res0 = insertelement <4 x float> undef, float %val0, i32 0 86 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 87 %res2 = insertelement <4 x float> %res1, float %val2, i32 2 88 %res3 = insertelement <4 x float> %res2, float %val3, i32 3 89 ret <4 x float> %res3 90 } 91 92 define <4 x float> @merge_4f32_f32_3zuu(float* %ptr) nounwind uwtable noinline ssp { 93 ; SSE-LABEL: merge_4f32_f32_3zuu: 94 ; SSE: # BB#0: 95 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 96 ; SSE-NEXT: retq 97 ; 98 ; AVX-LABEL: merge_4f32_f32_3zuu: 99 ; AVX: # BB#0: 100 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 101 ; AVX-NEXT: retq 102 ; 103 ; X32-SSE-LABEL: merge_4f32_f32_3zuu: 104 ; X32-SSE: # BB#0: 105 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 106 ; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 107 ; X32-SSE-NEXT: retl 108 %ptr0 = getelementptr inbounds float, float* %ptr, i64 3 109 %val0 = load float, float* %ptr0 110 %res0 = insertelement <4 x float> undef, float %val0, i32 0 111 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 112 ret <4 x float> %res1 113 } 114 115 define <4 x float> @merge_4f32_f32_34uu(float* %ptr) nounwind uwtable noinline ssp { 116 ; SSE-LABEL: merge_4f32_f32_34uu: 117 ; SSE: # BB#0: 118 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 119 ; SSE-NEXT: retq 120 ; 121 ; AVX-LABEL: merge_4f32_f32_34uu: 122 ; AVX: # BB#0: 123 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 124 ; AVX-NEXT: retq 125 ; 126 ; X32-SSE-LABEL: merge_4f32_f32_34uu: 127 ; X32-SSE: # BB#0: 128 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 129 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 130 ; X32-SSE-NEXT: retl 131 %ptr0 = getelementptr inbounds float, float* %ptr, i64 3 132 %ptr1 = getelementptr inbounds float, float* %ptr, i64 4 133 %val0 = load float, float* %ptr0 134 %val1 = load float, float* %ptr1 135 %res0 = insertelement <4 x float> undef, float %val0, i32 0 136 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 137 ret <4 x float> %res1 138 } 139 140 define <4 x float> @merge_4f32_f32_34z6(float* %ptr) nounwind uwtable noinline ssp { 141 ; SSE2-LABEL: merge_4f32_f32_34z6: 142 ; SSE2: # BB#0: 143 ; SSE2-NEXT: movups 12(%rdi), %xmm0 144 ; SSE2-NEXT: xorps %xmm1, %xmm1 145 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0] 146 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 147 ; SSE2-NEXT: retq 148 ; 149 ; SSE41-LABEL: merge_4f32_f32_34z6: 150 ; SSE41: # BB#0: 151 ; SSE41-NEXT: movups 12(%rdi), %xmm1 152 ; SSE41-NEXT: xorps %xmm0, %xmm0 153 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 154 ; SSE41-NEXT: retq 155 ; 156 ; AVX-LABEL: merge_4f32_f32_34z6: 157 ; AVX: # BB#0: 158 ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 159 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = mem[0,1],xmm0[2],mem[3] 160 ; AVX-NEXT: retq 161 ; 162 ; X32-SSE-LABEL: merge_4f32_f32_34z6: 163 ; X32-SSE: # BB#0: 164 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 165 ; X32-SSE-NEXT: movups 12(%eax), %xmm1 166 ; X32-SSE-NEXT: xorps %xmm0, %xmm0 167 ; X32-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] 168 ; X32-SSE-NEXT: retl 169 %ptr0 = getelementptr inbounds float, float* %ptr, i64 3 170 %ptr1 = getelementptr inbounds float, float* %ptr, i64 4 171 %ptr3 = getelementptr inbounds float, float* %ptr, i64 6 172 %val0 = load float, float* %ptr0 173 %val1 = load float, float* %ptr1 174 %val3 = load float, float* %ptr3 175 %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0 176 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 177 %res3 = insertelement <4 x float> %res1, float %val3, i32 3 178 ret <4 x float> %res3 179 } 180 181 define <4 x float> @merge_4f32_f32_45zz(float* %ptr) nounwind uwtable noinline ssp { 182 ; SSE-LABEL: merge_4f32_f32_45zz: 183 ; SSE: # BB#0: 184 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 185 ; SSE-NEXT: retq 186 ; 187 ; AVX-LABEL: merge_4f32_f32_45zz: 188 ; AVX: # BB#0: 189 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 190 ; AVX-NEXT: retq 191 ; 192 ; X32-SSE-LABEL: merge_4f32_f32_45zz: 193 ; X32-SSE: # BB#0: 194 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 195 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 196 ; X32-SSE-NEXT: retl 197 %ptr0 = getelementptr inbounds float, float* %ptr, i64 4 198 %ptr1 = getelementptr inbounds float, float* %ptr, i64 5 199 %val0 = load float, float* %ptr0 200 %val1 = load float, float* %ptr1 201 %res0 = insertelement <4 x float> zeroinitializer, float %val0, i32 0 202 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 203 ret <4 x float> %res1 204 } 205 206 define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline ssp { 207 ; SSE2-LABEL: merge_4f32_f32_012u: 208 ; SSE2: # BB#0: 209 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 210 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 211 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 212 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 213 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 214 ; SSE2-NEXT: retq 215 ; 216 ; SSE41-LABEL: merge_4f32_f32_012u: 217 ; SSE41: # BB#0: 218 ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 219 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 220 ; SSE41-NEXT: retq 221 ; 222 ; AVX-LABEL: merge_4f32_f32_012u: 223 ; AVX: # BB#0: 224 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 225 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 226 ; AVX-NEXT: retq 227 ; 228 ; X32-SSE-LABEL: merge_4f32_f32_012u: 229 ; X32-SSE: # BB#0: 230 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 231 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 232 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 233 ; X32-SSE-NEXT: retl 234 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 235 %ptr1 = getelementptr inbounds float, float* %ptr, i64 1 236 %ptr2 = getelementptr inbounds float, float* %ptr, i64 2 237 %val0 = load float, float* %ptr0 238 %val1 = load float, float* %ptr1 239 %val2 = load float, float* %ptr2 240 %res0 = insertelement <4 x float> undef, float %val0, i32 0 241 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 242 %res2 = insertelement <4 x float> %res1, float %val2, i32 2 243 %res3 = insertelement <4 x float> %res2, float undef, i32 3 244 ret <4 x float> %res3 245 } 246 247 define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline ssp { 248 ; SSE2-LABEL: merge_4f32_f32_019u: 249 ; SSE2: # BB#0: 250 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 251 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 252 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 253 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 254 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 255 ; SSE2-NEXT: retq 256 ; 257 ; SSE41-LABEL: merge_4f32_f32_019u: 258 ; SSE41: # BB#0: 259 ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 260 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 261 ; SSE41-NEXT: retq 262 ; 263 ; AVX-LABEL: merge_4f32_f32_019u: 264 ; AVX: # BB#0: 265 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 266 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 267 ; AVX-NEXT: retq 268 ; 269 ; X32-SSE-LABEL: merge_4f32_f32_019u: 270 ; X32-SSE: # BB#0: 271 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 272 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 273 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 274 ; X32-SSE-NEXT: retl 275 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 276 %ptr1 = getelementptr inbounds float, float* %ptr, i64 1 277 %ptr2 = getelementptr inbounds float, float* %ptr, i64 9 278 %val0 = load float, float* %ptr0 279 %val1 = load float, float* %ptr1 280 %val2 = load float, float* %ptr2 281 %res0 = insertelement <4 x float> undef, float %val0, i32 0 282 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 283 %res2 = insertelement <4 x float> %res1, float %val2, i32 2 284 %res3 = insertelement <4 x float> %res2, float undef, i32 3 285 ret <4 x float> %res3 286 } 287 288 define <4 x i32> @merge_4i32_i32_23u5(i32* %ptr) nounwind uwtable noinline ssp { 289 ; SSE-LABEL: merge_4i32_i32_23u5: 290 ; SSE: # BB#0: 291 ; SSE-NEXT: movups 8(%rdi), %xmm0 292 ; SSE-NEXT: retq 293 ; 294 ; AVX-LABEL: merge_4i32_i32_23u5: 295 ; AVX: # BB#0: 296 ; AVX-NEXT: vmovups 8(%rdi), %xmm0 297 ; AVX-NEXT: retq 298 ; 299 ; X32-SSE-LABEL: merge_4i32_i32_23u5: 300 ; X32-SSE: # BB#0: 301 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 302 ; X32-SSE-NEXT: movups 8(%eax), %xmm0 303 ; X32-SSE-NEXT: retl 304 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2 305 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3 306 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5 307 %val0 = load i32, i32* %ptr0 308 %val1 = load i32, i32* %ptr1 309 %val3 = load i32, i32* %ptr3 310 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0 311 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1 312 %res3 = insertelement <4 x i32> %res1, i32 %val3, i32 3 313 ret <4 x i32> %res3 314 } 315 316 define <4 x i32> @merge_4i32_i32_3zuu(i32* %ptr) nounwind uwtable noinline ssp { 317 ; SSE-LABEL: merge_4i32_i32_3zuu: 318 ; SSE: # BB#0: 319 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 320 ; SSE-NEXT: retq 321 ; 322 ; AVX-LABEL: merge_4i32_i32_3zuu: 323 ; AVX: # BB#0: 324 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 325 ; AVX-NEXT: retq 326 ; 327 ; X32-SSE-LABEL: merge_4i32_i32_3zuu: 328 ; X32-SSE: # BB#0: 329 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 330 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 331 ; X32-SSE-NEXT: retl 332 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3 333 %val0 = load i32, i32* %ptr0 334 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0 335 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 336 ret <4 x i32> %res1 337 } 338 339 define <4 x i32> @merge_4i32_i32_34uu(i32* %ptr) nounwind uwtable noinline ssp { 340 ; SSE-LABEL: merge_4i32_i32_34uu: 341 ; SSE: # BB#0: 342 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 343 ; SSE-NEXT: retq 344 ; 345 ; AVX-LABEL: merge_4i32_i32_34uu: 346 ; AVX: # BB#0: 347 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 348 ; AVX-NEXT: retq 349 ; 350 ; X32-SSE-LABEL: merge_4i32_i32_34uu: 351 ; X32-SSE: # BB#0: 352 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 353 ; X32-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 354 ; X32-SSE-NEXT: retl 355 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 3 356 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 4 357 %val0 = load i32, i32* %ptr0 358 %val1 = load i32, i32* %ptr1 359 %res0 = insertelement <4 x i32> undef, i32 %val0, i32 0 360 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1 361 ret <4 x i32> %res1 362 } 363 364 define <4 x i32> @merge_4i32_i32_45zz(i32* %ptr) nounwind uwtable noinline ssp { 365 ; SSE-LABEL: merge_4i32_i32_45zz: 366 ; SSE: # BB#0: 367 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 368 ; SSE-NEXT: retq 369 ; 370 ; AVX-LABEL: merge_4i32_i32_45zz: 371 ; AVX: # BB#0: 372 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 373 ; AVX-NEXT: retq 374 ; 375 ; X32-SSE-LABEL: merge_4i32_i32_45zz: 376 ; X32-SSE: # BB#0: 377 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 378 ; X32-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 379 ; X32-SSE-NEXT: retl 380 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 4 381 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 5 382 %val0 = load i32, i32* %ptr0 383 %val1 = load i32, i32* %ptr1 384 %res0 = insertelement <4 x i32> zeroinitializer, i32 %val0, i32 0 385 %res1 = insertelement <4 x i32> %res0, i32 %val1, i32 1 386 ret <4 x i32> %res1 387 } 388 389 define <8 x i16> @merge_8i16_i16_23u567u9(i16* %ptr) nounwind uwtable noinline ssp { 390 ; SSE-LABEL: merge_8i16_i16_23u567u9: 391 ; SSE: # BB#0: 392 ; SSE-NEXT: movups 4(%rdi), %xmm0 393 ; SSE-NEXT: retq 394 ; 395 ; AVX-LABEL: merge_8i16_i16_23u567u9: 396 ; AVX: # BB#0: 397 ; AVX-NEXT: vmovups 4(%rdi), %xmm0 398 ; AVX-NEXT: retq 399 ; 400 ; X32-SSE-LABEL: merge_8i16_i16_23u567u9: 401 ; X32-SSE: # BB#0: 402 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 403 ; X32-SSE-NEXT: movups 4(%eax), %xmm0 404 ; X32-SSE-NEXT: retl 405 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2 406 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3 407 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 5 408 %ptr4 = getelementptr inbounds i16, i16* %ptr, i64 6 409 %ptr5 = getelementptr inbounds i16, i16* %ptr, i64 7 410 %ptr7 = getelementptr inbounds i16, i16* %ptr, i64 9 411 %val0 = load i16, i16* %ptr0 412 %val1 = load i16, i16* %ptr1 413 %val3 = load i16, i16* %ptr3 414 %val4 = load i16, i16* %ptr4 415 %val5 = load i16, i16* %ptr5 416 %val7 = load i16, i16* %ptr7 417 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0 418 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1 419 %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3 420 %res4 = insertelement <8 x i16> %res3, i16 %val4, i32 4 421 %res5 = insertelement <8 x i16> %res4, i16 %val5, i32 5 422 %res7 = insertelement <8 x i16> %res5, i16 %val7, i32 7 423 ret <8 x i16> %res7 424 } 425 426 define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 427 ; SSE-LABEL: merge_8i16_i16_34uuuuuu: 428 ; SSE: # BB#0: 429 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 430 ; SSE-NEXT: retq 431 ; 432 ; AVX-LABEL: merge_8i16_i16_34uuuuuu: 433 ; AVX: # BB#0: 434 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 435 ; AVX-NEXT: retq 436 ; 437 ; X32-SSE-LABEL: merge_8i16_i16_34uuuuuu: 438 ; X32-SSE: # BB#0: 439 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 440 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 441 ; X32-SSE-NEXT: retl 442 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 3 443 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 4 444 %val0 = load i16, i16* %ptr0 445 %val1 = load i16, i16* %ptr1 446 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0 447 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1 448 ret <8 x i16> %res1 449 } 450 451 define <8 x i16> @merge_8i16_i16_45u7zzzz(i16* %ptr) nounwind uwtable noinline ssp { 452 ; SSE-LABEL: merge_8i16_i16_45u7zzzz: 453 ; SSE: # BB#0: 454 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 455 ; SSE-NEXT: retq 456 ; 457 ; AVX-LABEL: merge_8i16_i16_45u7zzzz: 458 ; AVX: # BB#0: 459 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 460 ; AVX-NEXT: retq 461 ; 462 ; X32-SSE-LABEL: merge_8i16_i16_45u7zzzz: 463 ; X32-SSE: # BB#0: 464 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 465 ; X32-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 466 ; X32-SSE-NEXT: retl 467 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4 468 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5 469 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7 470 %val0 = load i16, i16* %ptr0 471 %val1 = load i16, i16* %ptr1 472 %val3 = load i16, i16* %ptr3 473 %res0 = insertelement <8 x i16> undef, i16 %val0, i32 0 474 %res1 = insertelement <8 x i16> %res0, i16 %val1, i32 1 475 %res3 = insertelement <8 x i16> %res1, i16 %val3, i32 3 476 %res4 = insertelement <8 x i16> %res3, i16 0, i32 4 477 %res5 = insertelement <8 x i16> %res4, i16 0, i32 5 478 %res6 = insertelement <8 x i16> %res5, i16 0, i32 6 479 %res7 = insertelement <8 x i16> %res6, i16 0, i32 7 480 ret <8 x i16> %res7 481 } 482 483 define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(i8* %ptr) nounwind uwtable noinline ssp { 484 ; SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF: 485 ; SSE: # BB#0: 486 ; SSE-NEXT: movups (%rdi), %xmm0 487 ; SSE-NEXT: retq 488 ; 489 ; AVX-LABEL: merge_16i8_i8_01u3456789ABCDuF: 490 ; AVX: # BB#0: 491 ; AVX-NEXT: vmovups (%rdi), %xmm0 492 ; AVX-NEXT: retq 493 ; 494 ; X32-SSE-LABEL: merge_16i8_i8_01u3456789ABCDuF: 495 ; X32-SSE: # BB#0: 496 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 497 ; X32-SSE-NEXT: movups (%eax), %xmm0 498 ; X32-SSE-NEXT: retl 499 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0 500 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1 501 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3 502 %ptr4 = getelementptr inbounds i8, i8* %ptr, i64 4 503 %ptr5 = getelementptr inbounds i8, i8* %ptr, i64 5 504 %ptr6 = getelementptr inbounds i8, i8* %ptr, i64 6 505 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 7 506 %ptr8 = getelementptr inbounds i8, i8* %ptr, i64 8 507 %ptr9 = getelementptr inbounds i8, i8* %ptr, i64 9 508 %ptrA = getelementptr inbounds i8, i8* %ptr, i64 10 509 %ptrB = getelementptr inbounds i8, i8* %ptr, i64 11 510 %ptrC = getelementptr inbounds i8, i8* %ptr, i64 12 511 %ptrD = getelementptr inbounds i8, i8* %ptr, i64 13 512 %ptrF = getelementptr inbounds i8, i8* %ptr, i64 15 513 %val0 = load i8, i8* %ptr0 514 %val1 = load i8, i8* %ptr1 515 %val3 = load i8, i8* %ptr3 516 %val4 = load i8, i8* %ptr4 517 %val5 = load i8, i8* %ptr5 518 %val6 = load i8, i8* %ptr6 519 %val7 = load i8, i8* %ptr7 520 %val8 = load i8, i8* %ptr8 521 %val9 = load i8, i8* %ptr9 522 %valA = load i8, i8* %ptrA 523 %valB = load i8, i8* %ptrB 524 %valC = load i8, i8* %ptrC 525 %valD = load i8, i8* %ptrD 526 %valF = load i8, i8* %ptrF 527 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0 528 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1 529 %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3 530 %res4 = insertelement <16 x i8> %res3, i8 %val4, i32 4 531 %res5 = insertelement <16 x i8> %res4, i8 %val5, i32 5 532 %res6 = insertelement <16 x i8> %res5, i8 %val6, i32 6 533 %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7 534 %res8 = insertelement <16 x i8> %res7, i8 %val8, i32 8 535 %res9 = insertelement <16 x i8> %res8, i8 %val9, i32 9 536 %resA = insertelement <16 x i8> %res9, i8 %valA, i32 10 537 %resB = insertelement <16 x i8> %resA, i8 %valB, i32 11 538 %resC = insertelement <16 x i8> %resB, i8 %valC, i32 12 539 %resD = insertelement <16 x i8> %resC, i8 %valD, i32 13 540 %resF = insertelement <16 x i8> %resD, i8 %valF, i32 15 541 ret <16 x i8> %resF 542 } 543 544 define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp { 545 ; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz: 546 ; SSE: # BB#0: 547 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 548 ; SSE-NEXT: retq 549 ; 550 ; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz: 551 ; AVX: # BB#0: 552 ; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 553 ; AVX-NEXT: retq 554 ; 555 ; X32-SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz: 556 ; X32-SSE: # BB#0: 557 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 558 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 559 ; X32-SSE-NEXT: retl 560 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0 561 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1 562 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3 563 %val0 = load i8, i8* %ptr0 564 %val1 = load i8, i8* %ptr1 565 %val3 = load i8, i8* %ptr3 566 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0 567 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1 568 %res3 = insertelement <16 x i8> %res1, i8 %val3, i32 3 569 %res6 = insertelement <16 x i8> %res3, i8 0, i32 6 570 %res7 = insertelement <16 x i8> %res6, i8 0, i32 7 571 %resD = insertelement <16 x i8> %res7, i8 0, i32 13 572 %resE = insertelement <16 x i8> %resD, i8 0, i32 14 573 %resF = insertelement <16 x i8> %resE, i8 0, i32 15 574 ret <16 x i8> %resF 575 } 576 577 define <16 x i8> @merge_16i8_i8_0123uu67uuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp { 578 ; SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz: 579 ; SSE: # BB#0: 580 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 581 ; SSE-NEXT: retq 582 ; 583 ; AVX-LABEL: merge_16i8_i8_0123uu67uuuuuzzz: 584 ; AVX: # BB#0: 585 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 586 ; AVX-NEXT: retq 587 ; 588 ; X32-SSE-LABEL: merge_16i8_i8_0123uu67uuuuuzzz: 589 ; X32-SSE: # BB#0: 590 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 591 ; X32-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 592 ; X32-SSE-NEXT: retl 593 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0 594 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1 595 %ptr2 = getelementptr inbounds i8, i8* %ptr, i64 2 596 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 3 597 %ptr6 = getelementptr inbounds i8, i8* %ptr, i64 6 598 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 7 599 %val0 = load i8, i8* %ptr0 600 %val1 = load i8, i8* %ptr1 601 %val2 = load i8, i8* %ptr2 602 %val3 = load i8, i8* %ptr3 603 %val6 = load i8, i8* %ptr6 604 %val7 = load i8, i8* %ptr7 605 %res0 = insertelement <16 x i8> undef, i8 %val0, i32 0 606 %res1 = insertelement <16 x i8> %res0, i8 %val1, i32 1 607 %res2 = insertelement <16 x i8> %res1, i8 %val2, i32 2 608 %res3 = insertelement <16 x i8> %res2, i8 %val3, i32 3 609 %res6 = insertelement <16 x i8> %res3, i8 %val6, i32 6 610 %res7 = insertelement <16 x i8> %res6, i8 %val7, i32 7 611 %resD = insertelement <16 x i8> %res7, i8 0, i32 13 612 %resE = insertelement <16 x i8> %resD, i8 0, i32 14 613 %resF = insertelement <16 x i8> %resE, i8 0, i32 15 614 ret <16 x i8> %resF 615 } 616 617 define void @merge_4i32_i32_combine(<4 x i32>* %dst, i32* %src) { 618 ; SSE-LABEL: merge_4i32_i32_combine: 619 ; SSE: # BB#0: 620 ; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 621 ; SSE-NEXT: movaps %xmm0, (%rdi) 622 ; SSE-NEXT: retq 623 ; 624 ; AVX1-LABEL: merge_4i32_i32_combine: 625 ; AVX1: # BB#0: 626 ; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 627 ; AVX1-NEXT: vmovaps %xmm0, (%rdi) 628 ; AVX1-NEXT: retq 629 ; 630 ; AVX2-LABEL: merge_4i32_i32_combine: 631 ; AVX2: # BB#0: 632 ; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 633 ; AVX2-NEXT: vmovaps %xmm0, (%rdi) 634 ; AVX2-NEXT: retq 635 ; 636 ; AVX512F-LABEL: merge_4i32_i32_combine: 637 ; AVX512F: # BB#0: 638 ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 639 ; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) 640 ; AVX512F-NEXT: retq 641 ; 642 ; X32-SSE-LABEL: merge_4i32_i32_combine: 643 ; X32-SSE: # BB#0: 644 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 645 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 646 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 647 ; X32-SSE-NEXT: movaps %xmm0, (%eax) 648 ; X32-SSE-NEXT: retl 649 %1 = getelementptr i32, i32* %src, i32 0 650 %2 = load i32, i32* %1 651 %3 = insertelement <4 x i32> undef, i32 %2, i32 0 652 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer 653 %5 = lshr <4 x i32> %4, <i32 0, i32 undef, i32 undef, i32 undef> 654 %6 = and <4 x i32> %5, <i32 -1, i32 0, i32 0, i32 0> 655 store <4 x i32> %6, <4 x i32>* %dst 656 ret void 657 } 658 659 ; 660 ; consecutive loads including any/all volatiles may not be combined 661 ; 662 663 define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinline ssp { 664 ; SSE-LABEL: merge_2i64_i64_12_volatile: 665 ; SSE: # BB#0: 666 ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 667 ; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 668 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 669 ; SSE-NEXT: retq 670 ; 671 ; AVX-LABEL: merge_2i64_i64_12_volatile: 672 ; AVX: # BB#0: 673 ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 674 ; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 675 ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 676 ; AVX-NEXT: retq 677 ; 678 ; X32-SSE-LABEL: merge_2i64_i64_12_volatile: 679 ; X32-SSE: # BB#0: 680 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 681 ; X32-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 682 ; X32-SSE-NEXT: pinsrd $1, 12(%eax), %xmm0 683 ; X32-SSE-NEXT: pinsrd $2, 16(%eax), %xmm0 684 ; X32-SSE-NEXT: pinsrd $3, 20(%eax), %xmm0 685 ; X32-SSE-NEXT: retl 686 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 687 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2 688 %val0 = load volatile i64, i64* %ptr0 689 %val1 = load volatile i64, i64* %ptr1 690 %res0 = insertelement <2 x i64> undef, i64 %val0, i32 0 691 %res1 = insertelement <2 x i64> %res0, i64 %val1, i32 1 692 ret <2 x i64> %res1 693 } 694 695 define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable noinline ssp { 696 ; SSE2-LABEL: merge_4f32_f32_2345_volatile: 697 ; SSE2: # BB#0: 698 ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 699 ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 700 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 701 ; SSE2-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 702 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 703 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 704 ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 705 ; SSE2-NEXT: retq 706 ; 707 ; SSE41-LABEL: merge_4f32_f32_2345_volatile: 708 ; SSE41: # BB#0: 709 ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 710 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 711 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 712 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 713 ; SSE41-NEXT: retq 714 ; 715 ; AVX-LABEL: merge_4f32_f32_2345_volatile: 716 ; AVX: # BB#0: 717 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 718 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 719 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 720 ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 721 ; AVX-NEXT: retq 722 ; 723 ; X32-SSE-LABEL: merge_4f32_f32_2345_volatile: 724 ; X32-SSE: # BB#0: 725 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 726 ; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 727 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 728 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 729 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 730 ; X32-SSE-NEXT: retl 731 %ptr0 = getelementptr inbounds float, float* %ptr, i64 2 732 %ptr1 = getelementptr inbounds float, float* %ptr, i64 3 733 %ptr2 = getelementptr inbounds float, float* %ptr, i64 4 734 %ptr3 = getelementptr inbounds float, float* %ptr, i64 5 735 %val0 = load volatile float, float* %ptr0 736 %val1 = load float, float* %ptr1 737 %val2 = load float, float* %ptr2 738 %val3 = load float, float* %ptr3 739 %res0 = insertelement <4 x float> undef, float %val0, i32 0 740 %res1 = insertelement <4 x float> %res0, float %val1, i32 1 741 %res2 = insertelement <4 x float> %res1, float %val2, i32 2 742 %res3 = insertelement <4 x float> %res2, float %val3, i32 3 743 ret <4 x float> %res3 744 } 745 746 ; 747 ; Non-consecutive test. 748 ; 749 750 define <4 x float> @merge_4f32_f32_X0YY(float* %ptr0, float* %ptr1) nounwind uwtable noinline ssp { 751 ; SSE-LABEL: merge_4f32_f32_X0YY: 752 ; SSE: # BB#0: 753 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 754 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 755 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1] 756 ; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 757 ; SSE-NEXT: retq 758 ; 759 ; AVX-LABEL: merge_4f32_f32_X0YY: 760 ; AVX: # BB#0: 761 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 762 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 763 ; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] 764 ; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 765 ; AVX-NEXT: retq 766 ; 767 ; X32-SSE-LABEL: merge_4f32_f32_X0YY: 768 ; X32-SSE: # BB#0: 769 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 770 ; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 771 ; X32-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 772 ; X32-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 773 ; X32-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0,0,1,1] 774 ; X32-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 775 ; X32-SSE-NEXT: retl 776 %val0 = load float, float* %ptr0, align 4 777 %val1 = load float, float* %ptr1, align 4 778 %res0 = insertelement <4 x float> undef, float %val0, i32 0 779 %res1 = insertelement <4 x float> %res0, float 0.000000e+00, i32 1 780 %res2 = insertelement <4 x float> %res1, float %val1, i32 2 781 %res3 = insertelement <4 x float> %res2, float %val1, i32 3 782 ret <4 x float> %res3 783 } 784