Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
      4 ;
      5 ; Just one 32-bit run to make sure we do reasonable things.
      6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F
      7 
      8 define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
      9 ; ALL-LABEL: merge_8f64_2f64_12u4:
     10 ; ALL:       # %bb.0:
     11 ; ALL-NEXT:    vmovups 16(%rdi), %ymm0
     12 ; ALL-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
     13 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     14 ; ALL-NEXT:    retq
     15 ;
     16 ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
     17 ; X32-AVX512F:       # %bb.0:
     18 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     19 ; X32-AVX512F-NEXT:    vmovups 16(%eax), %ymm0
     20 ; X32-AVX512F-NEXT:    vinsertf128 $1, 64(%eax), %ymm0, %ymm1
     21 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     22 ; X32-AVX512F-NEXT:    retl
     23   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
     24   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     25   %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4
     26   %val0 = load <2 x double>, <2 x double>* %ptr0
     27   %val1 = load <2 x double>, <2 x double>* %ptr1
     28   %val3 = load <2 x double>, <2 x double>* %ptr3
     29   %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     30   %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     31   %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     32   ret <8 x double> %res
     33 }
     34 
     35 define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
     36 ; ALL-LABEL: merge_8f64_2f64_23z5:
     37 ; ALL:       # %bb.0:
     38 ; ALL-NEXT:    vmovups 32(%rdi), %ymm0
     39 ; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
     40 ; ALL-NEXT:    vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
     41 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     42 ; ALL-NEXT:    retq
     43 ;
     44 ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
     45 ; X32-AVX512F:       # %bb.0:
     46 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     47 ; X32-AVX512F-NEXT:    vmovups 32(%eax), %ymm0
     48 ; X32-AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
     49 ; X32-AVX512F-NEXT:    vinsertf128 $1, 80(%eax), %ymm1, %ymm1
     50 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     51 ; X32-AVX512F-NEXT:    retl
     52   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     53   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
     54   %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5
     55   %val0 = load <2 x double>, <2 x double>* %ptr0
     56   %val1 = load <2 x double>, <2 x double>* %ptr1
     57   %val3 = load <2 x double>, <2 x double>* %ptr3
     58   %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     59   %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     60   %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     61   ret <8 x double> %res
     62 }
     63 
     64 define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
     65 ; ALL-LABEL: merge_8f64_4f64_z2:
     66 ; ALL:       # %bb.0:
     67 ; ALL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
     68 ; ALL-NEXT:    vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
     69 ; ALL-NEXT:    retq
     70 ;
     71 ; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
     72 ; X32-AVX512F:       # %bb.0:
     73 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     74 ; X32-AVX512F-NEXT:    vxorps %xmm0, %xmm0, %xmm0
     75 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
     76 ; X32-AVX512F-NEXT:    retl
     77   %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
     78   %val1 = load <4 x double>, <4 x double>* %ptr1
     79   %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     80   ret <8 x double> %res
     81 }
     82 
     83 define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
     84 ; ALL-LABEL: merge_8f64_f64_23uuuuu9:
     85 ; ALL:       # %bb.0:
     86 ; ALL-NEXT:    vmovups 16(%rdi), %zmm0
     87 ; ALL-NEXT:    retq
     88 ;
     89 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
     90 ; X32-AVX512F:       # %bb.0:
     91 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     92 ; X32-AVX512F-NEXT:    vmovups 16(%eax), %zmm0
     93 ; X32-AVX512F-NEXT:    retl
     94   %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
     95   %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
     96   %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
     97   %val0 = load double, double* %ptr0
     98   %val1 = load double, double* %ptr1
     99   %val7 = load double, double* %ptr7
    100   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    101   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    102   %res7 = insertelement <8 x double> %res1, double %val7, i32 7
    103   ret <8 x double> %res7
    104 }
    105 
    106 define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
    107 ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
    108 ; ALL:       # %bb.0:
    109 ; ALL-NEXT:    vmovups 8(%rdi), %xmm0
    110 ; ALL-NEXT:    retq
    111 ;
    112 ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
    113 ; X32-AVX512F:       # %bb.0:
    114 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    115 ; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
    116 ; X32-AVX512F-NEXT:    retl
    117   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
    118   %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
    119   %val0 = load double, double* %ptr0
    120   %val1 = load double, double* %ptr1
    121   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    122   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    123   %res2 = insertelement <8 x double> %res1, double   0.0, i32 2
    124   %res3 = insertelement <8 x double> %res2, double   0.0, i32 3
    125   %res6 = insertelement <8 x double> %res3, double   0.0, i32 6
    126   %res7 = insertelement <8 x double> %res6, double   0.0, i32 7
    127   ret <8 x double> %res7
    128 }
    129 
    130 define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
    131 ; AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
    132 ; AVX512F:       # %bb.0:
    133 ; AVX512F-NEXT:    movb $32, %al
    134 ; AVX512F-NEXT:    kmovw %eax, %k0
    135 ; AVX512F-NEXT:    knotw %k0, %k1
    136 ; AVX512F-NEXT:    vmovupd 8(%rdi), %zmm0 {%k1} {z}
    137 ; AVX512F-NEXT:    retq
    138 ;
    139 ; AVX512BW-LABEL: merge_8f64_f64_1u3u5zu8:
    140 ; AVX512BW:       # %bb.0:
    141 ; AVX512BW-NEXT:    movb $32, %al
    142 ; AVX512BW-NEXT:    kmovd %eax, %k0
    143 ; AVX512BW-NEXT:    knotw %k0, %k1
    144 ; AVX512BW-NEXT:    vmovupd 8(%rdi), %zmm0 {%k1} {z}
    145 ; AVX512BW-NEXT:    retq
    146 ;
    147 ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
    148 ; X32-AVX512F:       # %bb.0:
    149 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    150 ; X32-AVX512F-NEXT:    movb $32, %cl
    151 ; X32-AVX512F-NEXT:    kmovw %ecx, %k0
    152 ; X32-AVX512F-NEXT:    knotw %k0, %k1
    153 ; X32-AVX512F-NEXT:    vmovupd 8(%eax), %zmm0 {%k1} {z}
    154 ; X32-AVX512F-NEXT:    retl
    155   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
    156   %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
    157   %ptr4 = getelementptr inbounds double, double* %ptr, i64 5
    158   %ptr7 = getelementptr inbounds double, double* %ptr, i64 8
    159   %val0 = load double, double* %ptr0
    160   %val2 = load double, double* %ptr2
    161   %val4 = load double, double* %ptr4
    162   %val7 = load double, double* %ptr7
    163   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    164   %res2 = insertelement <8 x double> %res0, double %val2, i32 2
    165   %res4 = insertelement <8 x double> %res2, double %val4, i32 4
    166   %res5 = insertelement <8 x double> %res4, double   0.0, i32 5
    167   %res7 = insertelement <8 x double> %res5, double %val7, i32 7
    168   ret <8 x double> %res7
    169 }
    170 
    171 define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
    172 ; ALL-LABEL: merge_8i64_4i64_z3:
    173 ; ALL:       # %bb.0:
    174 ; ALL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    175 ; ALL-NEXT:    vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0
    176 ; ALL-NEXT:    retq
    177 ;
    178 ; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
    179 ; X32-AVX512F:       # %bb.0:
    180 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    181 ; X32-AVX512F-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    182 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0
    183 ; X32-AVX512F-NEXT:    retl
    184   %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
    185   %val1 = load <4 x i64>, <4 x i64>* %ptr1
    186   %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    187   ret <8 x i64> %res
    188 }
    189 
    190 define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
    191 ; ALL-LABEL: merge_8i64_i64_56zz9uzz:
    192 ; ALL:       # %bb.0:
    193 ; ALL-NEXT:    vmovups 40(%rdi), %xmm0
    194 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    195 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    196 ; ALL-NEXT:    retq
    197 ;
    198 ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
    199 ; X32-AVX512F:       # %bb.0:
    200 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    201 ; X32-AVX512F-NEXT:    vmovups 40(%eax), %xmm0
    202 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    203 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    204 ; X32-AVX512F-NEXT:    retl
    205   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
    206   %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6
    207   %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9
    208   %val0 = load i64, i64* %ptr0
    209   %val1 = load i64, i64* %ptr1
    210   %val4 = load i64, i64* %ptr4
    211   %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
    212   %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
    213   %res2 = insertelement <8 x i64> %res1, i64     0, i32 2
    214   %res3 = insertelement <8 x i64> %res2, i64     0, i32 3
    215   %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
    216   %res6 = insertelement <8 x i64> %res4, i64     0, i32 6
    217   %res7 = insertelement <8 x i64> %res6, i64     0, i32 7
    218   ret <8 x i64> %res7
    219 }
    220 
    221 define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
    222 ; AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
    223 ; AVX512F:       # %bb.0:
    224 ; AVX512F-NEXT:    movb $32, %al
    225 ; AVX512F-NEXT:    kmovw %eax, %k0
    226 ; AVX512F-NEXT:    knotw %k0, %k1
    227 ; AVX512F-NEXT:    vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
    228 ; AVX512F-NEXT:    retq
    229 ;
    230 ; AVX512BW-LABEL: merge_8i64_i64_1u3u5zu8:
    231 ; AVX512BW:       # %bb.0:
    232 ; AVX512BW-NEXT:    movb $32, %al
    233 ; AVX512BW-NEXT:    kmovd %eax, %k0
    234 ; AVX512BW-NEXT:    knotw %k0, %k1
    235 ; AVX512BW-NEXT:    vmovdqu64 8(%rdi), %zmm0 {%k1} {z}
    236 ; AVX512BW-NEXT:    retq
    237 ;
    238 ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
    239 ; X32-AVX512F:       # %bb.0:
    240 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    241 ; X32-AVX512F-NEXT:    movb $32, %cl
    242 ; X32-AVX512F-NEXT:    kmovw %ecx, %k0
    243 ; X32-AVX512F-NEXT:    knotw %k0, %k1
    244 ; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0 {%k1} {z}
    245 ; X32-AVX512F-NEXT:    retl
    246   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
    247   %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
    248   %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
    249   %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
    250   %val0 = load i64, i64* %ptr0
    251   %val2 = load i64, i64* %ptr2
    252   %val4 = load i64, i64* %ptr4
    253   %val7 = load i64, i64* %ptr7
    254   %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
    255   %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
    256   %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
    257   %res5 = insertelement <8 x i64> %res4, i64     0, i32 5
    258   %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
    259   ret <8 x i64> %res7
    260 }
    261 
    262 define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp {
    263 ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
    264 ; ALL:       # %bb.0:
    265 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    266 ; ALL-NEXT:    retq
    267 ;
    268 ; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
    269 ; X32-AVX512F:       # %bb.0:
    270 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    271 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    272 ; X32-AVX512F-NEXT:    retl
    273   %ptr0 = getelementptr inbounds float, float* %ptr, i64 8
    274   %ptr1 = getelementptr inbounds float, float* %ptr, i64 9
    275   %val0 = load float, float* %ptr0
    276   %val1 = load float, float* %ptr1
    277   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    278   %res1 = insertelement <16 x float> %res0, float %val1, i32 1
    279   %res2 = insertelement <16 x float> %res1, float   0.0, i32 2
    280   %res3 = insertelement <16 x float> %res2, float   0.0, i32 3
    281   %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
    282   %resF = insertelement <16 x float> %res4, float   0.0, i32 15
    283   ret <16 x float> %resF
    284 }
    285 
    286 define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp {
    287 ; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
    288 ; ALL:       # %bb.0:
    289 ; ALL-NEXT:    vmovups 16(%rdi), %xmm0
    290 ; ALL-NEXT:    retq
    291 ;
    292 ; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
    293 ; X32-AVX512F:       # %bb.0:
    294 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    295 ; X32-AVX512F-NEXT:    vmovups 16(%eax), %xmm0
    296 ; X32-AVX512F-NEXT:    retl
    297   %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
    298   %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
    299   %ptr3 = getelementptr inbounds float, float* %ptr, i64 7
    300   %val0 = load float, float* %ptr0
    301   %val1 = load float, float* %ptr1
    302   %val3 = load float, float* %ptr3
    303   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    304   %res1 = insertelement <16 x float> %res0, float %val1, i32 1
    305   %res3 = insertelement <16 x float> %res1, float %val3, i32 3
    306   ret <16 x float> %res3
    307 }
    308 
    309 define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp {
    310 ; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
    311 ; ALL:       # %bb.0:
    312 ; ALL-NEXT:    vmovups (%rdi), %zmm0
    313 ; ALL-NEXT:    retq
    314 ;
    315 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
    316 ; X32-AVX512F:       # %bb.0:
    317 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    318 ; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
    319 ; X32-AVX512F-NEXT:    retl
    320   %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
    321   %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
    322   %ptrC = getelementptr inbounds float, float* %ptr, i64 12
    323   %ptrE = getelementptr inbounds float, float* %ptr, i64 14
    324   %ptrF = getelementptr inbounds float, float* %ptr, i64 15
    325   %val0 = load float, float* %ptr0
    326   %val3 = load float, float* %ptr3
    327   %valC = load float, float* %ptrC
    328   %valE = load float, float* %ptrE
    329   %valF = load float, float* %ptrF
    330   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    331   %res3 = insertelement <16 x float> %res0, float %val3, i32 3
    332   %resC = insertelement <16 x float> %res3, float %valC, i32 12
    333   %resE = insertelement <16 x float> %resC, float %valE, i32 14
    334   %resF = insertelement <16 x float> %resE, float %valF, i32 15
    335   ret <16 x float> %resF
    336 }
    337 
    338 define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
    339 ; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
    340 ; ALL:       # %bb.0:
    341 ; ALL-NEXT:    vmovups (%rdi), %zmm1
    342 ; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    343 ; ALL-NEXT:    vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    344 ; ALL-NEXT:    vpermi2ps %zmm2, %zmm1, %zmm0
    345 ; ALL-NEXT:    retq
    346 ;
    347 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
    348 ; X32-AVX512F:       # %bb.0:
    349 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    350 ; X32-AVX512F-NEXT:    vmovups (%eax), %zmm1
    351 ; X32-AVX512F-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    352 ; X32-AVX512F-NEXT:    vmovaps {{.*#+}} zmm0 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    353 ; X32-AVX512F-NEXT:    vpermi2ps %zmm2, %zmm1, %zmm0
    354 ; X32-AVX512F-NEXT:    retl
    355   %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
    356   %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
    357   %ptrC = getelementptr inbounds float, float* %ptr, i64 12
    358   %ptrE = getelementptr inbounds float, float* %ptr, i64 14
    359   %ptrF = getelementptr inbounds float, float* %ptr, i64 15
    360   %val0 = load float, float* %ptr0
    361   %val3 = load float, float* %ptr3
    362   %valC = load float, float* %ptrC
    363   %valE = load float, float* %ptrE
    364   %valF = load float, float* %ptrF
    365   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    366   %res3 = insertelement <16 x float> %res0, float %val3, i32 3
    367   %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
    368   %res5 = insertelement <16 x float> %res4, float   0.0, i32 5
    369   %resC = insertelement <16 x float> %res5, float %valC, i32 12
    370   %resD = insertelement <16 x float> %resC, float   0.0, i32 13
    371   %resE = insertelement <16 x float> %resD, float %valE, i32 14
    372   %resF = insertelement <16 x float> %resE, float %valF, i32 15
    373   ret <16 x float> %resF
    374 }
    375 
    376 define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp {
    377 ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
    378 ; ALL:       # %bb.0:
    379 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    380 ; ALL-NEXT:    retq
    381 ;
    382 ; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
    383 ; X32-AVX512F:       # %bb.0:
    384 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    385 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    386 ; X32-AVX512F-NEXT:    retl
    387   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
    388   %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2
    389   %val0 = load i32, i32* %ptr0
    390   %val1 = load i32, i32* %ptr1
    391   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    392   %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
    393   %res2 = insertelement <16 x i32> %res1, i32     0, i32 2
    394   %res3 = insertelement <16 x i32> %res2, i32     0, i32 3
    395   %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
    396   %resF = insertelement <16 x i32> %res4, i32     0, i32 15
    397   ret <16 x i32> %resF
    398 }
    399 
    400 define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp {
    401 ; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
    402 ; ALL:       # %bb.0:
    403 ; ALL-NEXT:    vmovups 8(%rdi), %xmm0
    404 ; ALL-NEXT:    retq
    405 ;
    406 ; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
    407 ; X32-AVX512F:       # %bb.0:
    408 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    409 ; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
    410 ; X32-AVX512F-NEXT:    retl
    411   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
    412   %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
    413   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
    414   %val0 = load i32, i32* %ptr0
    415   %val1 = load i32, i32* %ptr1
    416   %val3 = load i32, i32* %ptr3
    417   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    418   %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
    419   %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
    420   ret <16 x i32> %res3
    421 }
    422 
    423 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
    424 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
    425 ; ALL:       # %bb.0:
    426 ; ALL-NEXT:    vmovups (%rdi), %zmm0
    427 ; ALL-NEXT:    retq
    428 ;
    429 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
    430 ; X32-AVX512F:       # %bb.0:
    431 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    432 ; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
    433 ; X32-AVX512F-NEXT:    retl
    434   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    435   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    436   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    437   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    438   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    439   %val0 = load i32, i32* %ptr0
    440   %val3 = load i32, i32* %ptr3
    441   %valC = load i32, i32* %ptrC
    442   %valE = load i32, i32* %ptrE
    443   %valF = load i32, i32* %ptrF
    444   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    445   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    446   %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
    447   %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
    448   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    449   ret <16 x i32> %resF
    450 }
    451 
    452 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
    453 ; AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
    454 ; AVX512F:       # %bb.0:
    455 ; AVX512F-NEXT:    movw $8240, %ax # imm = 0x2030
    456 ; AVX512F-NEXT:    kmovw %eax, %k0
    457 ; AVX512F-NEXT:    knotw %k0, %k1
    458 ; AVX512F-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
    459 ; AVX512F-NEXT:    retq
    460 ;
    461 ; AVX512BW-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
    462 ; AVX512BW:       # %bb.0:
    463 ; AVX512BW-NEXT:    movw $8240, %ax # imm = 0x2030
    464 ; AVX512BW-NEXT:    kmovd %eax, %k0
    465 ; AVX512BW-NEXT:    knotw %k0, %k1
    466 ; AVX512BW-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z}
    467 ; AVX512BW-NEXT:    retq
    468 ;
    469 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
    470 ; X32-AVX512F:       # %bb.0:
    471 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    472 ; X32-AVX512F-NEXT:    movw $8240, %cx # imm = 0x2030
    473 ; X32-AVX512F-NEXT:    kmovw %ecx, %k0
    474 ; X32-AVX512F-NEXT:    knotw %k0, %k1
    475 ; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0 {%k1} {z}
    476 ; X32-AVX512F-NEXT:    retl
    477   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    478   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    479   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    480   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    481   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    482   %val0 = load i32, i32* %ptr0
    483   %val3 = load i32, i32* %ptr3
    484   %valC = load i32, i32* %ptrC
    485   %valE = load i32, i32* %ptrE
    486   %valF = load i32, i32* %ptrF
    487   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    488   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    489   %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
    490   %res5 = insertelement <16 x i32> %res4, i32     0, i32 5
    491   %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
    492   %resD = insertelement <16 x i32> %resC, i32     0, i32 13
    493   %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
    494   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    495   ret <16 x i32> %resF
    496 }
    497 
    498 define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
    499 ; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    500 ; AVX512F:       # %bb.0:
    501 ; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    502 ; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    503 ; AVX512F-NEXT:    retq
    504 ;
    505 ; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    506 ; AVX512BW:       # %bb.0:
    507 ; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    508 ; AVX512BW-NEXT:    retq
    509 ;
    510 ; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    511 ; X32-AVX512F:       # %bb.0:
    512 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    513 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    514 ; X32-AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    515 ; X32-AVX512F-NEXT:    retl
    516   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
    517   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
    518   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4
    519   %val0 = load i16, i16* %ptr0
    520   %val1 = load i16, i16* %ptr1
    521   %val3 = load i16, i16* %ptr3
    522   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    523   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    524   %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
    525   %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
    526   %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
    527   ret <32 x i16> %res31
    528 }
    529 
    530 define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
    531 ; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    532 ; ALL:       # %bb.0:
    533 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    534 ; ALL-NEXT:    retq
    535 ;
    536 ; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    537 ; X32-AVX512F:       # %bb.0:
    538 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    539 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    540 ; X32-AVX512F-NEXT:    retl
    541   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
    542   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
    543   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
    544   %val0 = load i16, i16* %ptr0
    545   %val1 = load i16, i16* %ptr1
    546   %val3 = load i16, i16* %ptr3
    547   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    548   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    549   %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
    550   ret <32 x i16> %res3
    551 }
    552 
    553 define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
    554 ; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    555 ; AVX512F:       # %bb.0:
    556 ; AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    557 ; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    558 ; AVX512F-NEXT:    retq
    559 ;
    560 ; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    561 ; AVX512BW:       # %bb.0:
    562 ; AVX512BW-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    563 ; AVX512BW-NEXT:    retq
    564 ;
    565 ; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    566 ; X32-AVX512F:       # %bb.0:
    567 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    568 ; X32-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    569 ; X32-AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    570 ; X32-AVX512F-NEXT:    retl
    571   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
    572   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
    573   %val0 = load i16, i16* %ptr0
    574   %val1 = load i16, i16* %ptr1
    575   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    576   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    577   %res3 = insertelement <32 x i16> %res1, i16     0, i16 3
    578   %resE = insertelement <32 x i16> %res3, i16     0, i16 14
    579   %resF = insertelement <32 x i16> %resE, i16     0, i16 15
    580   %resG = insertelement <32 x i16> %resF, i16     0, i16 16
    581   %resH = insertelement <32 x i16> %resG, i16     0, i16 17
    582   ret <32 x i16> %resH
    583 }
    584 
    585 define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
    586 ; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    587 ; AVX512F:       # %bb.0:
    588 ; AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    589 ; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    590 ; AVX512F-NEXT:    retq
    591 ;
    592 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    593 ; AVX512BW:       # %bb.0:
    594 ; AVX512BW-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    595 ; AVX512BW-NEXT:    retq
    596 ;
    597 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    598 ; X32-AVX512F:       # %bb.0:
    599 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    600 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    601 ; X32-AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    602 ; X32-AVX512F-NEXT:    retl
    603   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
    604   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
    605   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
    606   %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8
    607   %val0 = load i8, i8* %ptr0
    608   %val1 = load i8, i8* %ptr1
    609   %val3 = load i8, i8* %ptr3
    610   %val7 = load i8, i8* %ptr7
    611   %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
    612   %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
    613   %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
    614   %res7  = insertelement <64 x i8> %res3,  i8 %val7, i8 7
    615   %res14 = insertelement <64 x i8> %res7,  i8     0, i8 14
    616   %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
    617   %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
    618   %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
    619   %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
    620   ret <64 x i8> %res63
    621 }
    622 
    623 define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
    624 ; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    625 ; AVX512F:       # %bb.0:
    626 ; AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    627 ; AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    628 ; AVX512F-NEXT:    retq
    629 ;
    630 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    631 ; AVX512BW:       # %bb.0:
    632 ; AVX512BW-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    633 ; AVX512BW-NEXT:    retq
    634 ;
    635 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    636 ; X32-AVX512F:       # %bb.0:
    637 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    638 ; X32-AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    639 ; X32-AVX512F-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    640 ; X32-AVX512F-NEXT:    retl
    641   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
    642   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
    643   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
    644   %val0 = load i8, i8* %ptr0
    645   %val1 = load i8, i8* %ptr1
    646   %val3 = load i8, i8* %ptr3
    647   %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
    648   %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
    649   %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
    650   %res14 = insertelement <64 x i8> %res3,  i8     0, i8 14
    651   %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
    652   %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
    653   %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
    654   %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
    655   ret <64 x i8> %res63
    656 }
    657 
    658 ;
    659 ; consecutive loads including any/all volatiles may not be combined
    660 ;
    661 
    662 define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp {
    663 ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
    664 ; ALL:       # %bb.0:
    665 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    666 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    667 ; ALL-NEXT:    vbroadcastsd 72(%rdi), %ymm1
    668 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    669 ; ALL-NEXT:    retq
    670 ;
    671 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
    672 ; X32-AVX512F:       # %bb.0:
    673 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    674 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    675 ; X32-AVX512F-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    676 ; X32-AVX512F-NEXT:    vbroadcastsd 72(%eax), %ymm1
    677 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    678 ; X32-AVX512F-NEXT:    retl
    679   %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
    680   %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
    681   %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
    682   %val0 = load volatile double, double* %ptr0
    683   %val1 = load double, double* %ptr1
    684   %val7 = load double, double* %ptr7
    685   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    686   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    687   %res7 = insertelement <8 x double> %res1, double %val7, i32 7
    688   ret <8 x double> %res7
    689 }
    690 
    691 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp {
    692 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
    693 ; ALL:       # %bb.0:
    694 ; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    695 ; ALL-NEXT:    vpinsrd $3, 12(%rdi), %xmm0, %xmm0
    696 ; ALL-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    697 ; ALL-NEXT:    vpinsrd $2, 56(%rdi), %xmm1, %xmm1
    698 ; ALL-NEXT:    vpinsrd $3, 60(%rdi), %xmm1, %xmm1
    699 ; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
    700 ; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    701 ; ALL-NEXT:    retq
    702 ;
    703 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
    704 ; X32-AVX512F:       # %bb.0:
    705 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    706 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    707 ; X32-AVX512F-NEXT:    vpinsrd $3, 12(%eax), %xmm0, %xmm0
    708 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    709 ; X32-AVX512F-NEXT:    vpinsrd $2, 56(%eax), %xmm1, %xmm1
    710 ; X32-AVX512F-NEXT:    vpinsrd $3, 60(%eax), %xmm1, %xmm1
    711 ; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
    712 ; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    713 ; X32-AVX512F-NEXT:    retl
    714   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    715   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    716   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    717   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    718   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    719   %val0 = load volatile i32, i32* %ptr0
    720   %val3 = load volatile i32, i32* %ptr3
    721   %valC = load volatile i32, i32* %ptrC
    722   %valE = load volatile i32, i32* %ptrE
    723   %valF = load volatile i32, i32* %ptrF
    724   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    725   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    726   %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
    727   %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
    728   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    729   ret <16 x i32> %resF
    730 }
    731