1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 4 ; 5 ; Just one 32-bit run to make sure we do reasonable things. 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F 7 8 define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { 9 ; ALL-LABEL: merge_8f64_2f64_12u4: 10 ; ALL: # %bb.0: 11 ; ALL-NEXT: vmovups 16(%rdi), %ymm0 12 ; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 13 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 14 ; ALL-NEXT: retq 15 ; 16 ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: 17 ; X32-AVX512F: # %bb.0: 18 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 19 ; X32-AVX512F-NEXT: vmovups 16(%eax), %ymm0 20 ; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 21 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 22 ; X32-AVX512F-NEXT: retl 23 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1 24 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 25 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4 26 %val0 = load <2 x double>, <2 x double>* %ptr0 27 %val1 = load <2 x double>, <2 x double>* %ptr1 28 %val3 = load <2 x double>, <2 x double>* %ptr3 29 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 30 %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 31 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 32 ret <8 x double> %res 33 } 34 35 define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { 36 ; ALL-LABEL: merge_8f64_2f64_23z5: 37 ; ALL: # %bb.0: 38 ; ALL-NEXT: vmovups 32(%rdi), %ymm0 39 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 40 ; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 41 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 42 ; ALL-NEXT: retq 43 ; 44 ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: 45 ; X32-AVX512F: # %bb.0: 46 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 47 ; X32-AVX512F-NEXT: vmovups 32(%eax), %ymm0 48 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 49 ; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 50 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 51 ; X32-AVX512F-NEXT: retl 52 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 53 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3 54 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5 55 %val0 = load <2 x double>, <2 x double>* %ptr0 56 %val1 = load <2 x double>, <2 x double>* %ptr1 57 %val3 = load <2 x double>, <2 x double>* %ptr3 58 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 59 %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 60 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61 ret <8 x double> %res 62 } 63 64 define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp { 65 ; ALL-LABEL: merge_8f64_4f64_z2: 66 ; ALL: # %bb.0: 67 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 68 ; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0 69 ; ALL-NEXT: retq 70 ; 71 ; X32-AVX512F-LABEL: merge_8f64_4f64_z2: 72 ; X32-AVX512F: # %bb.0: 73 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 74 ; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 75 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0 76 ; X32-AVX512F-NEXT: retl 77 %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2 78 %val1 = load <4 x double>, <4 x double>* %ptr1 79 %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 80 ret <8 x double> %res 81 } 82 83 define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp { 84 ; ALL-LABEL: merge_8f64_f64_23uuuuu9: 85 ; ALL: # %bb.0: 86 ; ALL-NEXT: vmovups 16(%rdi), %zmm0 87 ; ALL-NEXT: retq 88 ; 89 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9: 90 ; X32-AVX512F: # %bb.0: 91 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 92 ; X32-AVX512F-NEXT: vmovups 16(%eax), %zmm0 93 ; X32-AVX512F-NEXT: retl 94 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 95 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 96 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 97 %val0 = load double, double* %ptr0 98 %val1 = load double, double* %ptr1 99 %val7 = load double, double* %ptr7 100 %res0 = insertelement <8 x double> undef, double %val0, i32 0 101 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 102 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 103 ret <8 x double> %res7 104 } 105 106 define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { 107 ; ALL-LABEL: merge_8f64_f64_12zzuuzz: 108 ; ALL: # %bb.0: 109 ; ALL-NEXT: vmovups 8(%rdi), %xmm0 110 ; ALL-NEXT: retq 111 ; 112 ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: 113 ; X32-AVX512F: # %bb.0: 114 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 115 ; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 116 ; X32-AVX512F-NEXT: retl 117 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 118 %ptr1 = getelementptr inbounds double, double* %ptr, i64 2 119 %val0 = load double, double* %ptr0 120 %val1 = load double, double* %ptr1 121 %res0 = insertelement <8 x double> undef, double %val0, i32 0 122 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 123 %res2 = insertelement <8 x double> %res1, double 0.0, i32 2 124 %res3 = insertelement <8 x double> %res2, double 0.0, i32 3 125 %res6 = insertelement <8 x double> %res3, double 0.0, i32 6 126 %res7 = insertelement <8 x double> %res6, double 0.0, i32 7 127 ret <8 x double> %res7 128 } 129 130 define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { 131 ; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: 132 ; AVX512F: # %bb.0: 133 ; AVX512F-NEXT: movb $32, %al 134 ; AVX512F-NEXT: kmovw %eax, %k0 135 ; AVX512F-NEXT: knotw %k0, %k1 136 ; AVX512F-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} 137 ; AVX512F-NEXT: retq 138 ; 139 ; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8: 140 ; AVX512BW: # %bb.0: 141 ; AVX512BW-NEXT: movb $32, %al 142 ; AVX512BW-NEXT: kmovd %eax, %k0 143 ; AVX512BW-NEXT: knotw %k0, %k1 144 ; AVX512BW-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} 145 ; AVX512BW-NEXT: retq 146 ; 147 ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: 148 ; X32-AVX512F: # %bb.0: 149 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 150 ; X32-AVX512F-NEXT: movb $32, %cl 151 ; X32-AVX512F-NEXT: kmovw %ecx, %k0 152 ; X32-AVX512F-NEXT: knotw %k0, %k1 153 ; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z} 154 ; X32-AVX512F-NEXT: retl 155 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 156 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3 157 %ptr4 = getelementptr inbounds double, double* %ptr, i64 5 158 %ptr7 = getelementptr inbounds double, double* %ptr, i64 8 159 %val0 = load double, double* %ptr0 160 %val2 = load double, double* %ptr2 161 %val4 = load double, double* %ptr4 162 %val7 = load double, double* %ptr7 163 %res0 = insertelement <8 x double> undef, double %val0, i32 0 164 %res2 = insertelement <8 x double> %res0, double %val2, i32 2 165 %res4 = insertelement <8 x double> %res2, double %val4, i32 4 166 %res5 = insertelement <8 x double> %res4, double 0.0, i32 5 167 %res7 = insertelement <8 x double> %res5, double %val7, i32 7 168 ret <8 x double> %res7 169 } 170 171 define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp { 172 ; ALL-LABEL: merge_8i64_4i64_z3: 173 ; ALL: # %bb.0: 174 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 175 ; ALL-NEXT: vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0 176 ; ALL-NEXT: retq 177 ; 178 ; X32-AVX512F-LABEL: merge_8i64_4i64_z3: 179 ; X32-AVX512F: # %bb.0: 180 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 181 ; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 182 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0 183 ; X32-AVX512F-NEXT: retl 184 %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3 185 %val1 = load <4 x i64>, <4 x i64>* %ptr1 186 %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 187 ret <8 x i64> %res 188 } 189 190 define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { 191 ; ALL-LABEL: merge_8i64_i64_56zz9uzz: 192 ; ALL: # %bb.0: 193 ; ALL-NEXT: vmovups 40(%rdi), %xmm0 194 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 195 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 196 ; ALL-NEXT: retq 197 ; 198 ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: 199 ; X32-AVX512F: # %bb.0: 200 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 201 ; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0 202 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 203 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 204 ; X32-AVX512F-NEXT: retl 205 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5 206 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6 207 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9 208 %val0 = load i64, i64* %ptr0 209 %val1 = load i64, i64* %ptr1 210 %val4 = load i64, i64* %ptr4 211 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 212 %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1 213 %res2 = insertelement <8 x i64> %res1, i64 0, i32 2 214 %res3 = insertelement <8 x i64> %res2, i64 0, i32 3 215 %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4 216 %res6 = insertelement <8 x i64> %res4, i64 0, i32 6 217 %res7 = insertelement <8 x i64> %res6, i64 0, i32 7 218 ret <8 x i64> %res7 219 } 220 221 define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { 222 ; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: 223 ; AVX512F: # %bb.0: 224 ; AVX512F-NEXT: movb $32, %al 225 ; AVX512F-NEXT: kmovw %eax, %k0 226 ; AVX512F-NEXT: knotw %k0, %k1 227 ; AVX512F-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} 228 ; AVX512F-NEXT: retq 229 ; 230 ; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8: 231 ; AVX512BW: # %bb.0: 232 ; AVX512BW-NEXT: movb $32, %al 233 ; AVX512BW-NEXT: kmovd %eax, %k0 234 ; AVX512BW-NEXT: knotw %k0, %k1 235 ; AVX512BW-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} 236 ; AVX512BW-NEXT: retq 237 ; 238 ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: 239 ; X32-AVX512F: # %bb.0: 240 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 241 ; X32-AVX512F-NEXT: movb $32, %cl 242 ; X32-AVX512F-NEXT: kmovw %ecx, %k0 243 ; X32-AVX512F-NEXT: knotw %k0, %k1 244 ; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z} 245 ; X32-AVX512F-NEXT: retl 246 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 247 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 248 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5 249 %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8 250 %val0 = load i64, i64* %ptr0 251 %val2 = load i64, i64* %ptr2 252 %val4 = load i64, i64* %ptr4 253 %val7 = load i64, i64* %ptr7 254 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 255 %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2 256 %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4 257 %res5 = insertelement <8 x i64> %res4, i64 0, i32 5 258 %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7 259 ret <8 x i64> %res7 260 } 261 262 define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp { 263 ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 264 ; ALL: # %bb.0: 265 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 266 ; ALL-NEXT: retq 267 ; 268 ; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 269 ; X32-AVX512F: # %bb.0: 270 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 271 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 272 ; X32-AVX512F-NEXT: retl 273 %ptr0 = getelementptr inbounds float, float* %ptr, i64 8 274 %ptr1 = getelementptr inbounds float, float* %ptr, i64 9 275 %val0 = load float, float* %ptr0 276 %val1 = load float, float* %ptr1 277 %res0 = insertelement <16 x float> undef, float %val0, i32 0 278 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 279 %res2 = insertelement <16 x float> %res1, float 0.0, i32 2 280 %res3 = insertelement <16 x float> %res2, float 0.0, i32 3 281 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 282 %resF = insertelement <16 x float> %res4, float 0.0, i32 15 283 ret <16 x float> %resF 284 } 285 286 define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp { 287 ; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 288 ; ALL: # %bb.0: 289 ; ALL-NEXT: vmovups 16(%rdi), %xmm0 290 ; ALL-NEXT: retq 291 ; 292 ; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 293 ; X32-AVX512F: # %bb.0: 294 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 295 ; X32-AVX512F-NEXT: vmovups 16(%eax), %xmm0 296 ; X32-AVX512F-NEXT: retl 297 %ptr0 = getelementptr inbounds float, float* %ptr, i64 4 298 %ptr1 = getelementptr inbounds float, float* %ptr, i64 5 299 %ptr3 = getelementptr inbounds float, float* %ptr, i64 7 300 %val0 = load float, float* %ptr0 301 %val1 = load float, float* %ptr1 302 %val3 = load float, float* %ptr3 303 %res0 = insertelement <16 x float> undef, float %val0, i32 0 304 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 305 %res3 = insertelement <16 x float> %res1, float %val3, i32 3 306 ret <16 x float> %res3 307 } 308 309 define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp { 310 ; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 311 ; ALL: # %bb.0: 312 ; ALL-NEXT: vmovups (%rdi), %zmm0 313 ; ALL-NEXT: retq 314 ; 315 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 316 ; X32-AVX512F: # %bb.0: 317 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 318 ; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 319 ; X32-AVX512F-NEXT: retl 320 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 321 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 322 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 323 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 324 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 325 %val0 = load float, float* %ptr0 326 %val3 = load float, float* %ptr3 327 %valC = load float, float* %ptrC 328 %valE = load float, float* %ptrE 329 %valF = load float, float* %ptrF 330 %res0 = insertelement <16 x float> undef, float %val0, i32 0 331 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 332 %resC = insertelement <16 x float> %res3, float %valC, i32 12 333 %resE = insertelement <16 x float> %resC, float %valE, i32 14 334 %resF = insertelement <16 x float> %resE, float %valF, i32 15 335 ret <16 x float> %resF 336 } 337 338 define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp { 339 ; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 340 ; ALL: # %bb.0: 341 ; ALL-NEXT: vmovups (%rdi), %zmm1 342 ; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 343 ; ALL-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 344 ; ALL-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 345 ; ALL-NEXT: retq 346 ; 347 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 348 ; X32-AVX512F: # %bb.0: 349 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 350 ; X32-AVX512F-NEXT: vmovups (%eax), %zmm1 351 ; X32-AVX512F-NEXT: vxorps %xmm2, %xmm2, %xmm2 352 ; X32-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 353 ; X32-AVX512F-NEXT: vpermi2ps %zmm2, %zmm1, %zmm0 354 ; X32-AVX512F-NEXT: retl 355 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 356 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 357 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 358 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 359 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 360 %val0 = load float, float* %ptr0 361 %val3 = load float, float* %ptr3 362 %valC = load float, float* %ptrC 363 %valE = load float, float* %ptrE 364 %valF = load float, float* %ptrF 365 %res0 = insertelement <16 x float> undef, float %val0, i32 0 366 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 367 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 368 %res5 = insertelement <16 x float> %res4, float 0.0, i32 5 369 %resC = insertelement <16 x float> %res5, float %valC, i32 12 370 %resD = insertelement <16 x float> %resC, float 0.0, i32 13 371 %resE = insertelement <16 x float> %resD, float %valE, i32 14 372 %resF = insertelement <16 x float> %resE, float %valF, i32 15 373 ret <16 x float> %resF 374 } 375 376 define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp { 377 ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 378 ; ALL: # %bb.0: 379 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 380 ; ALL-NEXT: retq 381 ; 382 ; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 383 ; X32-AVX512F: # %bb.0: 384 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 385 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 386 ; X32-AVX512F-NEXT: retl 387 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1 388 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2 389 %val0 = load i32, i32* %ptr0 390 %val1 = load i32, i32* %ptr1 391 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 392 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 393 %res2 = insertelement <16 x i32> %res1, i32 0, i32 2 394 %res3 = insertelement <16 x i32> %res2, i32 0, i32 3 395 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 396 %resF = insertelement <16 x i32> %res4, i32 0, i32 15 397 ret <16 x i32> %resF 398 } 399 400 define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp { 401 ; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 402 ; ALL: # %bb.0: 403 ; ALL-NEXT: vmovups 8(%rdi), %xmm0 404 ; ALL-NEXT: retq 405 ; 406 ; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 407 ; X32-AVX512F: # %bb.0: 408 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 409 ; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 410 ; X32-AVX512F-NEXT: retl 411 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2 412 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3 413 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5 414 %val0 = load i32, i32* %ptr0 415 %val1 = load i32, i32* %ptr1 416 %val3 = load i32, i32* %ptr3 417 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 418 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 419 %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3 420 ret <16 x i32> %res3 421 } 422 423 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp { 424 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 425 ; ALL: # %bb.0: 426 ; ALL-NEXT: vmovups (%rdi), %zmm0 427 ; ALL-NEXT: retq 428 ; 429 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 430 ; X32-AVX512F: # %bb.0: 431 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 432 ; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 433 ; X32-AVX512F-NEXT: retl 434 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 435 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 436 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 437 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 438 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 439 %val0 = load i32, i32* %ptr0 440 %val3 = load i32, i32* %ptr3 441 %valC = load i32, i32* %ptrC 442 %valE = load i32, i32* %ptrE 443 %valF = load i32, i32* %ptrF 444 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 445 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 446 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 447 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 448 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 449 ret <16 x i32> %resF 450 } 451 452 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { 453 ; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 454 ; AVX512F: # %bb.0: 455 ; AVX512F-NEXT: movw $8240, %ax # imm = 0x2030 456 ; AVX512F-NEXT: kmovw %eax, %k0 457 ; AVX512F-NEXT: knotw %k0, %k1 458 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 459 ; AVX512F-NEXT: retq 460 ; 461 ; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 462 ; AVX512BW: # %bb.0: 463 ; AVX512BW-NEXT: movw $8240, %ax # imm = 0x2030 464 ; AVX512BW-NEXT: kmovd %eax, %k0 465 ; AVX512BW-NEXT: knotw %k0, %k1 466 ; AVX512BW-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} 467 ; AVX512BW-NEXT: retq 468 ; 469 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 470 ; X32-AVX512F: # %bb.0: 471 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 472 ; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030 473 ; X32-AVX512F-NEXT: kmovw %ecx, %k0 474 ; X32-AVX512F-NEXT: knotw %k0, %k1 475 ; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z} 476 ; X32-AVX512F-NEXT: retl 477 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 478 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 479 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 480 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 481 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 482 %val0 = load i32, i32* %ptr0 483 %val3 = load i32, i32* %ptr3 484 %valC = load i32, i32* %ptrC 485 %valE = load i32, i32* %ptrE 486 %valF = load i32, i32* %ptrF 487 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 488 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 489 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 490 %res5 = insertelement <16 x i32> %res4, i32 0, i32 5 491 %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12 492 %resD = insertelement <16 x i32> %resC, i32 0, i32 13 493 %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14 494 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 495 ret <16 x i32> %resF 496 } 497 498 define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp { 499 ; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 500 ; AVX512F: # %bb.0: 501 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 502 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 503 ; AVX512F-NEXT: retq 504 ; 505 ; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 506 ; AVX512BW: # %bb.0: 507 ; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 508 ; AVX512BW-NEXT: retq 509 ; 510 ; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 511 ; X32-AVX512F: # %bb.0: 512 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 513 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 514 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 515 ; X32-AVX512F-NEXT: retl 516 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1 517 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2 518 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4 519 %val0 = load i16, i16* %ptr0 520 %val1 = load i16, i16* %ptr1 521 %val3 = load i16, i16* %ptr3 522 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 523 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 524 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 525 %res30 = insertelement <32 x i16> %res3, i16 0, i16 30 526 %res31 = insertelement <32 x i16> %res30, i16 0, i16 31 527 ret <32 x i16> %res31 528 } 529 530 define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 531 ; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 532 ; ALL: # %bb.0: 533 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 534 ; ALL-NEXT: retq 535 ; 536 ; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 537 ; X32-AVX512F: # %bb.0: 538 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 539 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 540 ; X32-AVX512F-NEXT: retl 541 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4 542 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5 543 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7 544 %val0 = load i16, i16* %ptr0 545 %val1 = load i16, i16* %ptr1 546 %val3 = load i16, i16* %ptr3 547 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 548 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 549 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 550 ret <32 x i16> %res3 551 } 552 553 define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 554 ; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 555 ; AVX512F: # %bb.0: 556 ; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 557 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 558 ; AVX512F-NEXT: retq 559 ; 560 ; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 561 ; AVX512BW: # %bb.0: 562 ; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 563 ; AVX512BW-NEXT: retq 564 ; 565 ; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 566 ; X32-AVX512F: # %bb.0: 567 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 568 ; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 569 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 570 ; X32-AVX512F-NEXT: retl 571 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2 572 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3 573 %val0 = load i16, i16* %ptr0 574 %val1 = load i16, i16* %ptr1 575 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 576 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 577 %res3 = insertelement <32 x i16> %res1, i16 0, i16 3 578 %resE = insertelement <32 x i16> %res3, i16 0, i16 14 579 %resF = insertelement <32 x i16> %resE, i16 0, i16 15 580 %resG = insertelement <32 x i16> %resF, i16 0, i16 16 581 %resH = insertelement <32 x i16> %resG, i16 0, i16 17 582 ret <32 x i16> %resH 583 } 584 585 define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 586 ; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 587 ; AVX512F: # %bb.0: 588 ; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 589 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 590 ; AVX512F-NEXT: retq 591 ; 592 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 593 ; AVX512BW: # %bb.0: 594 ; AVX512BW-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 595 ; AVX512BW-NEXT: retq 596 ; 597 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 598 ; X32-AVX512F: # %bb.0: 599 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 600 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 601 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 602 ; X32-AVX512F-NEXT: retl 603 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 604 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 605 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 606 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8 607 %val0 = load i8, i8* %ptr0 608 %val1 = load i8, i8* %ptr1 609 %val3 = load i8, i8* %ptr3 610 %val7 = load i8, i8* %ptr7 611 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 612 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 613 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 614 %res7 = insertelement <64 x i8> %res3, i8 %val7, i8 7 615 %res14 = insertelement <64 x i8> %res7, i8 0, i8 14 616 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 617 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 618 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 619 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 620 ret <64 x i8> %res63 621 } 622 623 define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 624 ; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 625 ; AVX512F: # %bb.0: 626 ; AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 627 ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 628 ; AVX512F-NEXT: retq 629 ; 630 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 631 ; AVX512BW: # %bb.0: 632 ; AVX512BW-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 633 ; AVX512BW-NEXT: retq 634 ; 635 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 636 ; X32-AVX512F: # %bb.0: 637 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 638 ; X32-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 639 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 640 ; X32-AVX512F-NEXT: retl 641 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 642 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 643 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 644 %val0 = load i8, i8* %ptr0 645 %val1 = load i8, i8* %ptr1 646 %val3 = load i8, i8* %ptr3 647 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 648 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 649 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 650 %res14 = insertelement <64 x i8> %res3, i8 0, i8 14 651 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 652 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 653 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 654 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 655 ret <64 x i8> %res63 656 } 657 658 ; 659 ; consecutive loads including any/all volatiles may not be combined 660 ; 661 662 define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp { 663 ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile: 664 ; ALL: # %bb.0: 665 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 666 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 667 ; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1 668 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 669 ; ALL-NEXT: retq 670 ; 671 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile: 672 ; X32-AVX512F: # %bb.0: 673 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 674 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 675 ; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 676 ; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1 677 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 678 ; X32-AVX512F-NEXT: retl 679 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 680 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 681 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 682 %val0 = load volatile double, double* %ptr0 683 %val1 = load double, double* %ptr1 684 %val7 = load double, double* %ptr7 685 %res0 = insertelement <8 x double> undef, double %val0, i32 0 686 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 687 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 688 ret <8 x double> %res7 689 } 690 691 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp { 692 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 693 ; ALL: # %bb.0: 694 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 695 ; ALL-NEXT: vpinsrd $3, 12(%rdi), %xmm0, %xmm0 696 ; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 697 ; ALL-NEXT: vpinsrd $2, 56(%rdi), %xmm1, %xmm1 698 ; ALL-NEXT: vpinsrd $3, 60(%rdi), %xmm1, %xmm1 699 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 700 ; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 701 ; ALL-NEXT: retq 702 ; 703 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 704 ; X32-AVX512F: # %bb.0: 705 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 706 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 707 ; X32-AVX512F-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0 708 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 709 ; X32-AVX512F-NEXT: vpinsrd $2, 56(%eax), %xmm1, %xmm1 710 ; X32-AVX512F-NEXT: vpinsrd $3, 60(%eax), %xmm1, %xmm1 711 ; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 712 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 713 ; X32-AVX512F-NEXT: retl 714 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 715 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 716 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 717 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 718 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 719 %val0 = load volatile i32, i32* %ptr0 720 %val3 = load volatile i32, i32* %ptr3 721 %valC = load volatile i32, i32* %ptrC 722 %valE = load volatile i32, i32* %ptrE 723 %valF = load volatile i32, i32* %ptrF 724 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 725 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 726 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 727 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 728 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 729 ret <16 x i32> %resF 730 } 731