1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 4 ; 5 ; Just one 32-bit run to make sure we do reasonable things. 6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F 7 8 define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { 9 ; ALL-LABEL: merge_8f64_2f64_12u4: 10 ; ALL: # BB#0: 11 ; ALL-NEXT: vmovupd 16(%rdi), %ymm0 12 ; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 13 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 14 ; ALL-NEXT: retq 15 ; 16 ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: 17 ; X32-AVX512F: # BB#0: 18 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 19 ; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0 20 ; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 21 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 22 ; X32-AVX512F-NEXT: retl 23 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1 24 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 25 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4 26 %val0 = load <2 x double>, <2 x double>* %ptr0 27 %val1 = load <2 x double>, <2 x double>* %ptr1 28 %val3 = load <2 x double>, <2 x double>* %ptr3 29 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 30 %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 31 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 32 ret <8 x double> %res 33 } 34 35 define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { 36 ; ALL-LABEL: merge_8f64_2f64_23z5: 37 ; ALL: # BB#0: 38 ; ALL-NEXT: vmovupd 32(%rdi), %ymm0 39 ; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 40 ; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 41 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 42 ; ALL-NEXT: retq 43 ; 44 ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: 45 ; X32-AVX512F: # BB#0: 46 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 47 ; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0 48 ; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 49 ; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 50 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 51 ; X32-AVX512F-NEXT: retl 52 %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 53 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3 54 %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5 55 %val0 = load <2 x double>, <2 x double>* %ptr0 56 %val1 = load <2 x double>, <2 x double>* %ptr1 57 %val3 = load <2 x double>, <2 x double>* %ptr3 58 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 59 %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 60 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61 ret <8 x double> %res 62 } 63 64 define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp { 65 ; ALL-LABEL: merge_8f64_4f64_z2: 66 ; ALL: # BB#0: 67 ; ALL-NEXT: vxorpd %ymm0, %ymm0, %ymm0 68 ; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0 69 ; ALL-NEXT: retq 70 ; 71 ; X32-AVX512F-LABEL: merge_8f64_4f64_z2: 72 ; X32-AVX512F: # BB#0: 73 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 74 ; X32-AVX512F-NEXT: vxorpd %ymm0, %ymm0, %ymm0 75 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0 76 ; X32-AVX512F-NEXT: retl 77 %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2 78 %val1 = load <4 x double>, <4 x double>* %ptr1 79 %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 80 ret <8 x double> %res 81 } 82 83 define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp { 84 ; ALL-LABEL: merge_8f64_f64_23uuuuu9: 85 ; ALL: # BB#0: 86 ; ALL-NEXT: vmovupd 16(%rdi), %zmm0 87 ; ALL-NEXT: retq 88 ; 89 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9: 90 ; X32-AVX512F: # BB#0: 91 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 92 ; X32-AVX512F-NEXT: vmovupd 16(%eax), %zmm0 93 ; X32-AVX512F-NEXT: retl 94 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 95 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 96 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 97 %val0 = load double, double* %ptr0 98 %val1 = load double, double* %ptr1 99 %val7 = load double, double* %ptr7 100 %res0 = insertelement <8 x double> undef, double %val0, i32 0 101 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 102 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 103 ret <8 x double> %res7 104 } 105 106 define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { 107 ; ALL-LABEL: merge_8f64_f64_12zzuuzz: 108 ; ALL: # BB#0: 109 ; ALL-NEXT: vmovupd 8(%rdi), %xmm0 110 ; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 111 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 112 ; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 113 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 114 ; ALL-NEXT: retq 115 ; 116 ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: 117 ; X32-AVX512F: # BB#0: 118 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 119 ; X32-AVX512F-NEXT: vmovupd 8(%eax), %xmm0 120 ; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 121 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 122 ; X32-AVX512F-NEXT: vxorpd %ymm1, %ymm1, %ymm1 123 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 124 ; X32-AVX512F-NEXT: retl 125 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 126 %ptr1 = getelementptr inbounds double, double* %ptr, i64 2 127 %val0 = load double, double* %ptr0 128 %val1 = load double, double* %ptr1 129 %res0 = insertelement <8 x double> undef, double %val0, i32 0 130 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 131 %res2 = insertelement <8 x double> %res1, double 0.0, i32 2 132 %res3 = insertelement <8 x double> %res2, double 0.0, i32 3 133 %res6 = insertelement <8 x double> %res3, double 0.0, i32 6 134 %res7 = insertelement <8 x double> %res6, double 0.0, i32 7 135 ret <8 x double> %res7 136 } 137 138 define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { 139 ; ALL-LABEL: merge_8f64_f64_1u3u5zu8: 140 ; ALL: # BB#0: 141 ; ALL-NEXT: vmovupd 8(%rdi), %zmm0 142 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 143 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7> 144 ; ALL-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 145 ; ALL-NEXT: retq 146 ; 147 ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: 148 ; X32-AVX512F: # BB#0: 149 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 150 ; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 151 ; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 152 ; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0> 153 ; X32-AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 154 ; X32-AVX512F-NEXT: retl 155 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 156 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3 157 %ptr4 = getelementptr inbounds double, double* %ptr, i64 5 158 %ptr7 = getelementptr inbounds double, double* %ptr, i64 8 159 %val0 = load double, double* %ptr0 160 %val2 = load double, double* %ptr2 161 %val4 = load double, double* %ptr4 162 %val7 = load double, double* %ptr7 163 %res0 = insertelement <8 x double> undef, double %val0, i32 0 164 %res2 = insertelement <8 x double> %res0, double %val2, i32 2 165 %res4 = insertelement <8 x double> %res2, double %val4, i32 4 166 %res5 = insertelement <8 x double> %res4, double 0.0, i32 5 167 %res7 = insertelement <8 x double> %res5, double %val7, i32 7 168 ret <8 x double> %res7 169 } 170 171 define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp { 172 ; ALL-LABEL: merge_8i64_4i64_z3: 173 ; ALL: # BB#0: 174 ; ALL-NEXT: vpxor %ymm0, %ymm0, %ymm0 175 ; ALL-NEXT: vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0 176 ; ALL-NEXT: retq 177 ; 178 ; X32-AVX512F-LABEL: merge_8i64_4i64_z3: 179 ; X32-AVX512F: # BB#0: 180 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 181 ; X32-AVX512F-NEXT: vpxor %ymm0, %ymm0, %ymm0 182 ; X32-AVX512F-NEXT: vinserti64x4 $1, 96(%eax), %zmm0, %zmm0 183 ; X32-AVX512F-NEXT: retl 184 %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3 185 %val1 = load <4 x i64>, <4 x i64>* %ptr1 186 %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 187 ret <8 x i64> %res 188 } 189 190 define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { 191 ; ALL-LABEL: merge_8i64_i64_56zz9uzz: 192 ; ALL: # BB#0: 193 ; ALL-NEXT: vmovdqu 40(%rdi), %xmm0 194 ; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 195 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 196 ; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 197 ; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 198 ; ALL-NEXT: retq 199 ; 200 ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: 201 ; X32-AVX512F: # BB#0: 202 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 203 ; X32-AVX512F-NEXT: vmovdqu 40(%eax), %xmm0 204 ; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 205 ; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 206 ; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 207 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 208 ; X32-AVX512F-NEXT: retl 209 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5 210 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6 211 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9 212 %val0 = load i64, i64* %ptr0 213 %val1 = load i64, i64* %ptr1 214 %val4 = load i64, i64* %ptr4 215 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 216 %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1 217 %res2 = insertelement <8 x i64> %res1, i64 0, i32 2 218 %res3 = insertelement <8 x i64> %res2, i64 0, i32 3 219 %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4 220 %res6 = insertelement <8 x i64> %res4, i64 0, i32 6 221 %res7 = insertelement <8 x i64> %res6, i64 0, i32 7 222 ret <8 x i64> %res7 223 } 224 225 define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { 226 ; ALL-LABEL: merge_8i64_i64_1u3u5zu8: 227 ; ALL: # BB#0: 228 ; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0 229 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 230 ; ALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7> 231 ; ALL-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 232 ; ALL-NEXT: retq 233 ; 234 ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: 235 ; X32-AVX512F: # BB#0: 236 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 237 ; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 238 ; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 239 ; X32-AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0> 240 ; X32-AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0 241 ; X32-AVX512F-NEXT: retl 242 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 243 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 244 %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5 245 %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8 246 %val0 = load i64, i64* %ptr0 247 %val2 = load i64, i64* %ptr2 248 %val4 = load i64, i64* %ptr4 249 %val7 = load i64, i64* %ptr7 250 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0 251 %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2 252 %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4 253 %res5 = insertelement <8 x i64> %res4, i64 0, i32 5 254 %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7 255 ret <8 x i64> %res7 256 } 257 258 define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp { 259 ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 260 ; ALL: # BB#0: 261 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 262 ; ALL-NEXT: retq 263 ; 264 ; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz: 265 ; X32-AVX512F: # BB#0: 266 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 267 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 268 ; X32-AVX512F-NEXT: retl 269 %ptr0 = getelementptr inbounds float, float* %ptr, i64 8 270 %ptr1 = getelementptr inbounds float, float* %ptr, i64 9 271 %val0 = load float, float* %ptr0 272 %val1 = load float, float* %ptr1 273 %res0 = insertelement <16 x float> undef, float %val0, i32 0 274 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 275 %res2 = insertelement <16 x float> %res1, float 0.0, i32 2 276 %res3 = insertelement <16 x float> %res2, float 0.0, i32 3 277 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 278 %resF = insertelement <16 x float> %res4, float 0.0, i32 15 279 ret <16 x float> %resF 280 } 281 282 define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp { 283 ; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 284 ; ALL: # BB#0: 285 ; ALL-NEXT: vmovups 16(%rdi), %xmm0 286 ; ALL-NEXT: retq 287 ; 288 ; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu: 289 ; X32-AVX512F: # BB#0: 290 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 291 ; X32-AVX512F-NEXT: vmovups 16(%eax), %xmm0 292 ; X32-AVX512F-NEXT: retl 293 %ptr0 = getelementptr inbounds float, float* %ptr, i64 4 294 %ptr1 = getelementptr inbounds float, float* %ptr, i64 5 295 %ptr3 = getelementptr inbounds float, float* %ptr, i64 7 296 %val0 = load float, float* %ptr0 297 %val1 = load float, float* %ptr1 298 %val3 = load float, float* %ptr3 299 %res0 = insertelement <16 x float> undef, float %val0, i32 0 300 %res1 = insertelement <16 x float> %res0, float %val1, i32 1 301 %res3 = insertelement <16 x float> %res1, float %val3, i32 3 302 ret <16 x float> %res3 303 } 304 305 define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp { 306 ; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 307 ; ALL: # BB#0: 308 ; ALL-NEXT: vmovups (%rdi), %zmm0 309 ; ALL-NEXT: retq 310 ; 311 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF: 312 ; X32-AVX512F: # BB#0: 313 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 314 ; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 315 ; X32-AVX512F-NEXT: retl 316 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 317 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 318 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 319 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 320 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 321 %val0 = load float, float* %ptr0 322 %val3 = load float, float* %ptr3 323 %valC = load float, float* %ptrC 324 %valE = load float, float* %ptrE 325 %valF = load float, float* %ptrF 326 %res0 = insertelement <16 x float> undef, float %val0, i32 0 327 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 328 %resC = insertelement <16 x float> %res3, float %valC, i32 12 329 %resE = insertelement <16 x float> %resC, float %valE, i32 14 330 %resF = insertelement <16 x float> %resE, float %valF, i32 15 331 ret <16 x float> %resF 332 } 333 334 define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp { 335 ; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 336 ; ALL: # BB#0: 337 ; ALL-NEXT: vmovups (%rdi), %zmm0 338 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 339 ; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 340 ; ALL-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 341 ; ALL-NEXT: retq 342 ; 343 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF: 344 ; X32-AVX512F: # BB#0: 345 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 346 ; X32-AVX512F-NEXT: vmovups (%eax), %zmm0 347 ; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 348 ; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 349 ; X32-AVX512F-NEXT: vpermt2ps %zmm1, %zmm2, %zmm0 350 ; X32-AVX512F-NEXT: retl 351 %ptr0 = getelementptr inbounds float, float* %ptr, i64 0 352 %ptr3 = getelementptr inbounds float, float* %ptr, i64 3 353 %ptrC = getelementptr inbounds float, float* %ptr, i64 12 354 %ptrE = getelementptr inbounds float, float* %ptr, i64 14 355 %ptrF = getelementptr inbounds float, float* %ptr, i64 15 356 %val0 = load float, float* %ptr0 357 %val3 = load float, float* %ptr3 358 %valC = load float, float* %ptrC 359 %valE = load float, float* %ptrE 360 %valF = load float, float* %ptrF 361 %res0 = insertelement <16 x float> undef, float %val0, i32 0 362 %res3 = insertelement <16 x float> %res0, float %val3, i32 3 363 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4 364 %res5 = insertelement <16 x float> %res4, float 0.0, i32 5 365 %resC = insertelement <16 x float> %res5, float %valC, i32 12 366 %resD = insertelement <16 x float> %resC, float 0.0, i32 13 367 %resE = insertelement <16 x float> %resD, float %valE, i32 14 368 %resF = insertelement <16 x float> %resE, float %valF, i32 15 369 ret <16 x float> %resF 370 } 371 372 define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp { 373 ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 374 ; ALL: # BB#0: 375 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 376 ; ALL-NEXT: retq 377 ; 378 ; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz: 379 ; X32-AVX512F: # BB#0: 380 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 381 ; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 382 ; X32-AVX512F-NEXT: retl 383 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1 384 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2 385 %val0 = load i32, i32* %ptr0 386 %val1 = load i32, i32* %ptr1 387 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 388 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 389 %res2 = insertelement <16 x i32> %res1, i32 0, i32 2 390 %res3 = insertelement <16 x i32> %res2, i32 0, i32 3 391 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 392 %resF = insertelement <16 x i32> %res4, i32 0, i32 15 393 ret <16 x i32> %resF 394 } 395 396 define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp { 397 ; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 398 ; ALL: # BB#0: 399 ; ALL-NEXT: vmovups 8(%rdi), %xmm0 400 ; ALL-NEXT: retq 401 ; 402 ; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu: 403 ; X32-AVX512F: # BB#0: 404 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 405 ; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 406 ; X32-AVX512F-NEXT: retl 407 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2 408 %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3 409 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5 410 %val0 = load i32, i32* %ptr0 411 %val1 = load i32, i32* %ptr1 412 %val3 = load i32, i32* %ptr3 413 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 414 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1 415 %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3 416 ret <16 x i32> %res3 417 } 418 419 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp { 420 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 421 ; ALL: # BB#0: 422 ; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 423 ; ALL-NEXT: retq 424 ; 425 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF: 426 ; X32-AVX512F: # BB#0: 427 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 428 ; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 429 ; X32-AVX512F-NEXT: retl 430 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 431 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 432 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 433 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 434 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 435 %val0 = load i32, i32* %ptr0 436 %val3 = load i32, i32* %ptr3 437 %valC = load i32, i32* %ptrC 438 %valE = load i32, i32* %ptrE 439 %valF = load i32, i32* %ptrF 440 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 441 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 442 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 443 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 444 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 445 ret <16 x i32> %resF 446 } 447 448 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { 449 ; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 450 ; ALL: # BB#0: 451 ; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 452 ; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1 453 ; ALL-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 454 ; ALL-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 455 ; ALL-NEXT: retq 456 ; 457 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: 458 ; X32-AVX512F: # BB#0: 459 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 460 ; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 461 ; X32-AVX512F-NEXT: vpxord %zmm1, %zmm1, %zmm1 462 ; X32-AVX512F-NEXT: vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15> 463 ; X32-AVX512F-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 464 ; X32-AVX512F-NEXT: retl 465 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 466 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 467 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 468 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 469 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 470 %val0 = load i32, i32* %ptr0 471 %val3 = load i32, i32* %ptr3 472 %valC = load i32, i32* %ptrC 473 %valE = load i32, i32* %ptrE 474 %valF = load i32, i32* %ptrF 475 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 476 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 477 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4 478 %res5 = insertelement <16 x i32> %res4, i32 0, i32 5 479 %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12 480 %resD = insertelement <16 x i32> %resC, i32 0, i32 13 481 %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14 482 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 483 ret <16 x i32> %resF 484 } 485 486 define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp { 487 ; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 488 ; AVX512F: # BB#0: 489 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 490 ; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 491 ; AVX512F-NEXT: retq 492 ; 493 ; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 494 ; AVX512BW: # BB#0: 495 ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 496 ; AVX512BW-NEXT: retq 497 ; 498 ; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz: 499 ; X32-AVX512F: # BB#0: 500 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 501 ; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 502 ; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 503 ; X32-AVX512F-NEXT: retl 504 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1 505 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2 506 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4 507 %val0 = load i16, i16* %ptr0 508 %val1 = load i16, i16* %ptr1 509 %val3 = load i16, i16* %ptr3 510 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 511 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 512 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 513 %res30 = insertelement <32 x i16> %res3, i16 0, i16 30 514 %res31 = insertelement <32 x i16> %res30, i16 0, i16 31 515 ret <32 x i16> %res31 516 } 517 518 define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 519 ; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 520 ; ALL: # BB#0: 521 ; ALL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 522 ; ALL-NEXT: retq 523 ; 524 ; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu: 525 ; X32-AVX512F: # BB#0: 526 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 527 ; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 528 ; X32-AVX512F-NEXT: retl 529 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4 530 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5 531 %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7 532 %val0 = load i16, i16* %ptr0 533 %val1 = load i16, i16* %ptr1 534 %val3 = load i16, i16* %ptr3 535 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 536 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 537 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3 538 ret <32 x i16> %res3 539 } 540 541 define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp { 542 ; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 543 ; AVX512F: # BB#0: 544 ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 545 ; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 546 ; AVX512F-NEXT: retq 547 ; 548 ; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 549 ; AVX512BW: # BB#0: 550 ; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 551 ; AVX512BW-NEXT: retq 552 ; 553 ; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu: 554 ; X32-AVX512F: # BB#0: 555 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 556 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 557 ; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 558 ; X32-AVX512F-NEXT: retl 559 %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2 560 %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3 561 %val0 = load i16, i16* %ptr0 562 %val1 = load i16, i16* %ptr1 563 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0 564 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1 565 %res3 = insertelement <32 x i16> %res1, i16 0, i16 3 566 %resE = insertelement <32 x i16> %res3, i16 0, i16 14 567 %resF = insertelement <32 x i16> %resE, i16 0, i16 15 568 %resG = insertelement <32 x i16> %resF, i16 0, i16 16 569 %resH = insertelement <32 x i16> %resG, i16 0, i16 17 570 ret <32 x i16> %resH 571 } 572 573 define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 574 ; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 575 ; AVX512F: # BB#0: 576 ; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 577 ; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 578 ; AVX512F-NEXT: retq 579 ; 580 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 581 ; AVX512BW: # BB#0: 582 ; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 583 ; AVX512BW-NEXT: retq 584 ; 585 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 586 ; X32-AVX512F: # BB#0: 587 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 588 ; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 589 ; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 590 ; X32-AVX512F-NEXT: retl 591 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 592 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 593 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 594 %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8 595 %val0 = load i8, i8* %ptr0 596 %val1 = load i8, i8* %ptr1 597 %val3 = load i8, i8* %ptr3 598 %val7 = load i8, i8* %ptr7 599 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 600 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 601 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 602 %res7 = insertelement <64 x i8> %res3, i8 %val7, i8 7 603 %res14 = insertelement <64 x i8> %res7, i8 0, i8 14 604 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 605 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 606 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 607 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 608 ret <64 x i8> %res63 609 } 610 611 define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp { 612 ; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 613 ; AVX512F: # BB#0: 614 ; AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 615 ; AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 616 ; AVX512F-NEXT: retq 617 ; 618 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 619 ; AVX512BW: # BB#0: 620 ; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 621 ; AVX512BW-NEXT: retq 622 ; 623 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz: 624 ; X32-AVX512F: # BB#0: 625 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 626 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 627 ; X32-AVX512F-NEXT: vxorps %ymm1, %ymm1, %ymm1 628 ; X32-AVX512F-NEXT: retl 629 %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1 630 %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2 631 %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4 632 %val0 = load i8, i8* %ptr0 633 %val1 = load i8, i8* %ptr1 634 %val3 = load i8, i8* %ptr3 635 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0 636 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1 637 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3 638 %res14 = insertelement <64 x i8> %res3, i8 0, i8 14 639 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15 640 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16 641 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17 642 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63 643 ret <64 x i8> %res63 644 } 645 646 ; 647 ; consecutive loads including any/all volatiles may not be combined 648 ; 649 650 define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp { 651 ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile: 652 ; ALL: # BB#0: 653 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 654 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 655 ; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1 656 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 657 ; ALL-NEXT: retq 658 ; 659 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile: 660 ; X32-AVX512F: # BB#0: 661 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 662 ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 663 ; X32-AVX512F-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 664 ; X32-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1 665 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 666 ; X32-AVX512F-NEXT: retl 667 %ptr0 = getelementptr inbounds double, double* %ptr, i64 2 668 %ptr1 = getelementptr inbounds double, double* %ptr, i64 3 669 %ptr7 = getelementptr inbounds double, double* %ptr, i64 9 670 %val0 = load volatile double, double* %ptr0 671 %val1 = load double, double* %ptr1 672 %val7 = load double, double* %ptr7 673 %res0 = insertelement <8 x double> undef, double %val0, i32 0 674 %res1 = insertelement <8 x double> %res0, double %val1, i32 1 675 %res7 = insertelement <8 x double> %res1, double %val7, i32 7 676 ret <8 x double> %res7 677 } 678 679 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp { 680 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 681 ; ALL: # BB#0: 682 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 683 ; ALL-NEXT: vpinsrd $3, 12(%rdi), %xmm0, %xmm0 684 ; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 685 ; ALL-NEXT: vpinsrd $2, 56(%rdi), %xmm1, %xmm1 686 ; ALL-NEXT: vpinsrd $3, 60(%rdi), %xmm1, %xmm1 687 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 688 ; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 689 ; ALL-NEXT: retq 690 ; 691 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile: 692 ; X32-AVX512F: # BB#0: 693 ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax 694 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 695 ; X32-AVX512F-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0 696 ; X32-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 697 ; X32-AVX512F-NEXT: vpinsrd $2, 56(%eax), %xmm1, %xmm1 698 ; X32-AVX512F-NEXT: vpinsrd $3, 60(%eax), %xmm1, %xmm1 699 ; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 700 ; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 701 ; X32-AVX512F-NEXT: retl 702 %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 703 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 704 %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12 705 %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14 706 %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15 707 %val0 = load volatile i32, i32* %ptr0 708 %val3 = load volatile i32, i32* %ptr3 709 %valC = load volatile i32, i32* %ptrC 710 %valE = load volatile i32, i32* %ptrE 711 %valF = load volatile i32, i32* %ptrF 712 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0 713 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3 714 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12 715 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14 716 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15 717 ret <16 x i32> %resF 718 } 719