Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
      4 ;
      5 ; Just one 32-bit run to make sure we do reasonable things.
      6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X32-AVX512F
      7 
      8 define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
      9 ; ALL-LABEL: merge_8f64_2f64_12u4:
     10 ; ALL:       # BB#0:
     11 ; ALL-NEXT:    vmovupd 16(%rdi), %ymm0
     12 ; ALL-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
     13 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     14 ; ALL-NEXT:    retq
     15 ;
     16 ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
     17 ; X32-AVX512F:       # BB#0:
     18 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     19 ; X32-AVX512F-NEXT:    vmovupd 16(%eax), %ymm0
     20 ; X32-AVX512F-NEXT:    vinsertf128 $1, 64(%eax), %ymm0, %ymm1
     21 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     22 ; X32-AVX512F-NEXT:    retl
     23   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 1
     24   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     25   %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 4
     26   %val0 = load <2 x double>, <2 x double>* %ptr0
     27   %val1 = load <2 x double>, <2 x double>* %ptr1
     28   %val3 = load <2 x double>, <2 x double>* %ptr3
     29   %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     30   %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     31   %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     32   ret <8 x double> %res
     33 }
     34 
     35 define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
     36 ; ALL-LABEL: merge_8f64_2f64_23z5:
     37 ; ALL:       # BB#0:
     38 ; ALL-NEXT:    vmovupd 32(%rdi), %ymm0
     39 ; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     40 ; ALL-NEXT:    vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
     41 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     42 ; ALL-NEXT:    retq
     43 ;
     44 ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
     45 ; X32-AVX512F:       # BB#0:
     46 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     47 ; X32-AVX512F-NEXT:    vmovupd 32(%eax), %ymm0
     48 ; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
     49 ; X32-AVX512F-NEXT:    vinsertf128 $1, 80(%eax), %ymm1, %ymm1
     50 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
     51 ; X32-AVX512F-NEXT:    retl
     52   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     53   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
     54   %ptr3 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 5
     55   %val0 = load <2 x double>, <2 x double>* %ptr0
     56   %val1 = load <2 x double>, <2 x double>* %ptr1
     57   %val3 = load <2 x double>, <2 x double>* %ptr3
     58   %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     59   %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     60   %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     61   ret <8 x double> %res
     62 }
     63 
     64 define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
     65 ; ALL-LABEL: merge_8f64_4f64_z2:
     66 ; ALL:       # BB#0:
     67 ; ALL-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
     68 ; ALL-NEXT:    vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
     69 ; ALL-NEXT:    retq
     70 ;
     71 ; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
     72 ; X32-AVX512F:       # BB#0:
     73 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     74 ; X32-AVX512F-NEXT:    vxorpd %ymm0, %ymm0, %ymm0
     75 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
     76 ; X32-AVX512F-NEXT:    retl
     77   %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
     78   %val1 = load <4 x double>, <4 x double>* %ptr1
     79   %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     80   ret <8 x double> %res
     81 }
     82 
     83 define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noinline ssp {
     84 ; ALL-LABEL: merge_8f64_f64_23uuuuu9:
     85 ; ALL:       # BB#0:
     86 ; ALL-NEXT:    vmovupd 16(%rdi), %zmm0
     87 ; ALL-NEXT:    retq
     88 ;
     89 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
     90 ; X32-AVX512F:       # BB#0:
     91 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
     92 ; X32-AVX512F-NEXT:    vmovupd 16(%eax), %zmm0
     93 ; X32-AVX512F-NEXT:    retl
     94   %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
     95   %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
     96   %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
     97   %val0 = load double, double* %ptr0
     98   %val1 = load double, double* %ptr1
     99   %val7 = load double, double* %ptr7
    100   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    101   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    102   %res7 = insertelement <8 x double> %res1, double %val7, i32 7
    103   ret <8 x double> %res7
    104 }
    105 
    106 define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
    107 ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
    108 ; ALL:       # BB#0:
    109 ; ALL-NEXT:    vmovupd 8(%rdi), %xmm0
    110 ; ALL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    111 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    112 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    113 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    114 ; ALL-NEXT:    retq
    115 ;
    116 ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
    117 ; X32-AVX512F:       # BB#0:
    118 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    119 ; X32-AVX512F-NEXT:    vmovupd 8(%eax), %xmm0
    120 ; X32-AVX512F-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    121 ; X32-AVX512F-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    122 ; X32-AVX512F-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    123 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    124 ; X32-AVX512F-NEXT:    retl
    125   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
    126   %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
    127   %val0 = load double, double* %ptr0
    128   %val1 = load double, double* %ptr1
    129   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    130   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    131   %res2 = insertelement <8 x double> %res1, double   0.0, i32 2
    132   %res3 = insertelement <8 x double> %res2, double   0.0, i32 3
    133   %res6 = insertelement <8 x double> %res3, double   0.0, i32 6
    134   %res7 = insertelement <8 x double> %res6, double   0.0, i32 7
    135   ret <8 x double> %res7
    136 }
    137 
    138 define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp {
    139 ; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
    140 ; ALL:       # BB#0:
    141 ; ALL-NEXT:    vmovupd 8(%rdi), %zmm0
    142 ; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    143 ; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
    144 ; ALL-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    145 ; ALL-NEXT:    retq
    146 ;
    147 ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
    148 ; X32-AVX512F:       # BB#0:
    149 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    150 ; X32-AVX512F-NEXT:    vmovupd 8(%eax), %zmm0
    151 ; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    152 ; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
    153 ; X32-AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    154 ; X32-AVX512F-NEXT:    retl
    155   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
    156   %ptr2 = getelementptr inbounds double, double* %ptr, i64 3
    157   %ptr4 = getelementptr inbounds double, double* %ptr, i64 5
    158   %ptr7 = getelementptr inbounds double, double* %ptr, i64 8
    159   %val0 = load double, double* %ptr0
    160   %val2 = load double, double* %ptr2
    161   %val4 = load double, double* %ptr4
    162   %val7 = load double, double* %ptr7
    163   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    164   %res2 = insertelement <8 x double> %res0, double %val2, i32 2
    165   %res4 = insertelement <8 x double> %res2, double %val4, i32 4
    166   %res5 = insertelement <8 x double> %res4, double   0.0, i32 5
    167   %res7 = insertelement <8 x double> %res5, double %val7, i32 7
    168   ret <8 x double> %res7
    169 }
    170 
    171 define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
    172 ; ALL-LABEL: merge_8i64_4i64_z3:
    173 ; ALL:       # BB#0:
    174 ; ALL-NEXT:    vpxor %ymm0, %ymm0, %ymm0
    175 ; ALL-NEXT:    vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0
    176 ; ALL-NEXT:    retq
    177 ;
    178 ; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
    179 ; X32-AVX512F:       # BB#0:
    180 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    181 ; X32-AVX512F-NEXT:    vpxor %ymm0, %ymm0, %ymm0
    182 ; X32-AVX512F-NEXT:    vinserti64x4 $1, 96(%eax), %zmm0, %zmm0
    183 ; X32-AVX512F-NEXT:    retl
    184   %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
    185   %val1 = load <4 x i64>, <4 x i64>* %ptr1
    186   %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    187   ret <8 x i64> %res
    188 }
    189 
    190 define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
    191 ; ALL-LABEL: merge_8i64_i64_56zz9uzz:
    192 ; ALL:       # BB#0:
    193 ; ALL-NEXT:    vmovdqu 40(%rdi), %xmm0
    194 ; ALL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    195 ; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    196 ; ALL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    197 ; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    198 ; ALL-NEXT:    retq
    199 ;
    200 ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
    201 ; X32-AVX512F:       # BB#0:
    202 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    203 ; X32-AVX512F-NEXT:    vmovdqu 40(%eax), %xmm0
    204 ; X32-AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    205 ; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    206 ; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
    207 ; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    208 ; X32-AVX512F-NEXT:    retl
    209   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
    210   %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6
    211   %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 9
    212   %val0 = load i64, i64* %ptr0
    213   %val1 = load i64, i64* %ptr1
    214   %val4 = load i64, i64* %ptr4
    215   %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
    216   %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
    217   %res2 = insertelement <8 x i64> %res1, i64     0, i32 2
    218   %res3 = insertelement <8 x i64> %res2, i64     0, i32 3
    219   %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
    220   %res6 = insertelement <8 x i64> %res4, i64     0, i32 6
    221   %res7 = insertelement <8 x i64> %res6, i64     0, i32 7
    222   ret <8 x i64> %res7
    223 }
    224 
    225 define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp {
    226 ; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
    227 ; ALL:       # BB#0:
    228 ; ALL-NEXT:    vmovdqu64 8(%rdi), %zmm0
    229 ; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    230 ; ALL-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,u,2,u,4,13,u,7>
    231 ; ALL-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
    232 ; ALL-NEXT:    retq
    233 ;
    234 ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
    235 ; X32-AVX512F:       # BB#0:
    236 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    237 ; X32-AVX512F-NEXT:    vmovdqu64 8(%eax), %zmm0
    238 ; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    239 ; X32-AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <0,0,u,u,2,0,u,u,4,0,13,0,u,u,7,0>
    240 ; X32-AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
    241 ; X32-AVX512F-NEXT:    retl
    242   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
    243   %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
    244   %ptr4 = getelementptr inbounds i64, i64* %ptr, i64 5
    245   %ptr7 = getelementptr inbounds i64, i64* %ptr, i64 8
    246   %val0 = load i64, i64* %ptr0
    247   %val2 = load i64, i64* %ptr2
    248   %val4 = load i64, i64* %ptr4
    249   %val7 = load i64, i64* %ptr7
    250   %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
    251   %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
    252   %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
    253   %res5 = insertelement <8 x i64> %res4, i64     0, i32 5
    254   %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
    255   ret <8 x i64> %res7
    256 }
    257 
    258 define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(float* %ptr) nounwind uwtable noinline ssp {
    259 ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
    260 ; ALL:       # BB#0:
    261 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    262 ; ALL-NEXT:    retq
    263 ;
    264 ; X32-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
    265 ; X32-AVX512F:       # BB#0:
    266 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    267 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    268 ; X32-AVX512F-NEXT:    retl
    269   %ptr0 = getelementptr inbounds float, float* %ptr, i64 8
    270   %ptr1 = getelementptr inbounds float, float* %ptr, i64 9
    271   %val0 = load float, float* %ptr0
    272   %val1 = load float, float* %ptr1
    273   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    274   %res1 = insertelement <16 x float> %res0, float %val1, i32 1
    275   %res2 = insertelement <16 x float> %res1, float   0.0, i32 2
    276   %res3 = insertelement <16 x float> %res2, float   0.0, i32 3
    277   %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
    278   %resF = insertelement <16 x float> %res4, float   0.0, i32 15
    279   ret <16 x float> %resF
    280 }
    281 
    282 define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(float* %ptr) nounwind uwtable noinline ssp {
    283 ; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
    284 ; ALL:       # BB#0:
    285 ; ALL-NEXT:    vmovups 16(%rdi), %xmm0
    286 ; ALL-NEXT:    retq
    287 ;
    288 ; X32-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
    289 ; X32-AVX512F:       # BB#0:
    290 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    291 ; X32-AVX512F-NEXT:    vmovups 16(%eax), %xmm0
    292 ; X32-AVX512F-NEXT:    retl
    293   %ptr0 = getelementptr inbounds float, float* %ptr, i64 4
    294   %ptr1 = getelementptr inbounds float, float* %ptr, i64 5
    295   %ptr3 = getelementptr inbounds float, float* %ptr, i64 7
    296   %val0 = load float, float* %ptr0
    297   %val1 = load float, float* %ptr1
    298   %val3 = load float, float* %ptr3
    299   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    300   %res1 = insertelement <16 x float> %res0, float %val1, i32 1
    301   %res3 = insertelement <16 x float> %res1, float %val3, i32 3
    302   ret <16 x float> %res3
    303 }
    304 
    305 define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(float* %ptr) nounwind uwtable noinline ssp {
    306 ; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
    307 ; ALL:       # BB#0:
    308 ; ALL-NEXT:    vmovups (%rdi), %zmm0
    309 ; ALL-NEXT:    retq
    310 ;
    311 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
    312 ; X32-AVX512F:       # BB#0:
    313 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    314 ; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
    315 ; X32-AVX512F-NEXT:    retl
    316   %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
    317   %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
    318   %ptrC = getelementptr inbounds float, float* %ptr, i64 12
    319   %ptrE = getelementptr inbounds float, float* %ptr, i64 14
    320   %ptrF = getelementptr inbounds float, float* %ptr, i64 15
    321   %val0 = load float, float* %ptr0
    322   %val3 = load float, float* %ptr3
    323   %valC = load float, float* %ptrC
    324   %valE = load float, float* %ptrE
    325   %valF = load float, float* %ptrF
    326   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    327   %res3 = insertelement <16 x float> %res0, float %val3, i32 3
    328   %resC = insertelement <16 x float> %res3, float %valC, i32 12
    329   %resE = insertelement <16 x float> %resC, float %valE, i32 14
    330   %resF = insertelement <16 x float> %resE, float %valF, i32 15
    331   ret <16 x float> %resF
    332 }
    333 
    334 define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(float* %ptr) nounwind uwtable noinline ssp {
    335 ; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
    336 ; ALL:       # BB#0:
    337 ; ALL-NEXT:    vmovups (%rdi), %zmm0
    338 ; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    339 ; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    340 ; ALL-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
    341 ; ALL-NEXT:    retq
    342 ;
    343 ; X32-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
    344 ; X32-AVX512F:       # BB#0:
    345 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    346 ; X32-AVX512F-NEXT:    vmovups (%eax), %zmm0
    347 ; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    348 ; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    349 ; X32-AVX512F-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
    350 ; X32-AVX512F-NEXT:    retl
    351   %ptr0 = getelementptr inbounds float, float* %ptr, i64 0
    352   %ptr3 = getelementptr inbounds float, float* %ptr, i64 3
    353   %ptrC = getelementptr inbounds float, float* %ptr, i64 12
    354   %ptrE = getelementptr inbounds float, float* %ptr, i64 14
    355   %ptrF = getelementptr inbounds float, float* %ptr, i64 15
    356   %val0 = load float, float* %ptr0
    357   %val3 = load float, float* %ptr3
    358   %valC = load float, float* %ptrC
    359   %valE = load float, float* %ptrE
    360   %valF = load float, float* %ptrF
    361   %res0 = insertelement <16 x float> undef, float %val0, i32 0
    362   %res3 = insertelement <16 x float> %res0, float %val3, i32 3
    363   %res4 = insertelement <16 x float> %res3, float   0.0, i32 4
    364   %res5 = insertelement <16 x float> %res4, float   0.0, i32 5
    365   %resC = insertelement <16 x float> %res5, float %valC, i32 12
    366   %resD = insertelement <16 x float> %resC, float   0.0, i32 13
    367   %resE = insertelement <16 x float> %resD, float %valE, i32 14
    368   %resF = insertelement <16 x float> %resE, float %valF, i32 15
    369   ret <16 x float> %resF
    370 }
    371 
    372 define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(i32* %ptr) nounwind uwtable noinline ssp {
    373 ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
    374 ; ALL:       # BB#0:
    375 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    376 ; ALL-NEXT:    retq
    377 ;
    378 ; X32-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
    379 ; X32-AVX512F:       # BB#0:
    380 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    381 ; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    382 ; X32-AVX512F-NEXT:    retl
    383   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
    384   %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 2
    385   %val0 = load i32, i32* %ptr0
    386   %val1 = load i32, i32* %ptr1
    387   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    388   %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
    389   %res2 = insertelement <16 x i32> %res1, i32     0, i32 2
    390   %res3 = insertelement <16 x i32> %res2, i32     0, i32 3
    391   %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
    392   %resF = insertelement <16 x i32> %res4, i32     0, i32 15
    393   ret <16 x i32> %resF
    394 }
    395 
    396 define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable noinline ssp {
    397 ; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
    398 ; ALL:       # BB#0:
    399 ; ALL-NEXT:    vmovups 8(%rdi), %xmm0
    400 ; ALL-NEXT:    retq
    401 ;
    402 ; X32-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
    403 ; X32-AVX512F:       # BB#0:
    404 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    405 ; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
    406 ; X32-AVX512F-NEXT:    retl
    407   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 2
    408   %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 3
    409   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 5
    410   %val0 = load i32, i32* %ptr0
    411   %val1 = load i32, i32* %ptr1
    412   %val3 = load i32, i32* %ptr3
    413   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    414   %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
    415   %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
    416   ret <16 x i32> %res3
    417 }
    418 
    419 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
    420 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
    421 ; ALL:       # BB#0:
    422 ; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
    423 ; ALL-NEXT:    retq
    424 ;
    425 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
    426 ; X32-AVX512F:       # BB#0:
    427 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    428 ; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
    429 ; X32-AVX512F-NEXT:    retl
    430   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    431   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    432   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    433   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    434   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    435   %val0 = load i32, i32* %ptr0
    436   %val3 = load i32, i32* %ptr3
    437   %valC = load i32, i32* %ptrC
    438   %valE = load i32, i32* %ptrE
    439   %valF = load i32, i32* %ptrF
    440   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    441   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    442   %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
    443   %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
    444   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    445   ret <16 x i32> %resF
    446 }
    447 
    448 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp {
    449 ; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
    450 ; ALL:       # BB#0:
    451 ; ALL-NEXT:    vmovdqu32 (%rdi), %zmm0
    452 ; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    453 ; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    454 ; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
    455 ; ALL-NEXT:    retq
    456 ;
    457 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
    458 ; X32-AVX512F:       # BB#0:
    459 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    460 ; X32-AVX512F-NEXT:    vmovdqu32 (%eax), %zmm0
    461 ; X32-AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    462 ; X32-AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,u,u,3,20,21,u,u,u,u,u,u,12,29,14,15>
    463 ; X32-AVX512F-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
    464 ; X32-AVX512F-NEXT:    retl
    465   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    466   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    467   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    468   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    469   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    470   %val0 = load i32, i32* %ptr0
    471   %val3 = load i32, i32* %ptr3
    472   %valC = load i32, i32* %ptrC
    473   %valE = load i32, i32* %ptrE
    474   %valF = load i32, i32* %ptrF
    475   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    476   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    477   %res4 = insertelement <16 x i32> %res3, i32     0, i32 4
    478   %res5 = insertelement <16 x i32> %res4, i32     0, i32 5
    479   %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
    480   %resD = insertelement <16 x i32> %resC, i32     0, i32 13
    481   %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
    482   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    483   ret <16 x i32> %resF
    484 }
    485 
    486 define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(i16* %ptr) nounwind uwtable noinline ssp {
    487 ; AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    488 ; AVX512F:       # BB#0:
    489 ; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    490 ; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    491 ; AVX512F-NEXT:    retq
    492 ;
    493 ; AVX512BW-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    494 ; AVX512BW:       # BB#0:
    495 ; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    496 ; AVX512BW-NEXT:    retq
    497 ;
    498 ; X32-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
    499 ; X32-AVX512F:       # BB#0:
    500 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    501 ; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    502 ; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    503 ; X32-AVX512F-NEXT:    retl
    504   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 1
    505   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 2
    506   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 4
    507   %val0 = load i16, i16* %ptr0
    508   %val1 = load i16, i16* %ptr1
    509   %val3 = load i16, i16* %ptr3
    510   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    511   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    512   %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
    513   %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
    514   %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
    515   ret <32 x i16> %res31
    516 }
    517 
    518 define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
    519 ; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    520 ; ALL:       # BB#0:
    521 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    522 ; ALL-NEXT:    retq
    523 ;
    524 ; X32-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    525 ; X32-AVX512F:       # BB#0:
    526 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    527 ; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    528 ; X32-AVX512F-NEXT:    retl
    529   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
    530   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
    531   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
    532   %val0 = load i16, i16* %ptr0
    533   %val1 = load i16, i16* %ptr1
    534   %val3 = load i16, i16* %ptr3
    535   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    536   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    537   %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
    538   ret <32 x i16> %res3
    539 }
    540 
    541 define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
    542 ; AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    543 ; AVX512F:       # BB#0:
    544 ; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    545 ; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    546 ; AVX512F-NEXT:    retq
    547 ;
    548 ; AVX512BW-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    549 ; AVX512BW:       # BB#0:
    550 ; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    551 ; AVX512BW-NEXT:    retq
    552 ;
    553 ; X32-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    554 ; X32-AVX512F:       # BB#0:
    555 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    556 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    557 ; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    558 ; X32-AVX512F-NEXT:    retl
    559   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 2
    560   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 3
    561   %val0 = load i16, i16* %ptr0
    562   %val1 = load i16, i16* %ptr1
    563   %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
    564   %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
    565   %res3 = insertelement <32 x i16> %res1, i16     0, i16 3
    566   %resE = insertelement <32 x i16> %res3, i16     0, i16 14
    567   %resF = insertelement <32 x i16> %resE, i16     0, i16 15
    568   %resG = insertelement <32 x i16> %resF, i16     0, i16 16
    569   %resH = insertelement <32 x i16> %resG, i16     0, i16 17
    570   ret <32 x i16> %resH
    571 }
    572 
    573 define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
    574 ; AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    575 ; AVX512F:       # BB#0:
    576 ; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    577 ; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    578 ; AVX512F-NEXT:    retq
    579 ;
    580 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    581 ; AVX512BW:       # BB#0:
    582 ; AVX512BW-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    583 ; AVX512BW-NEXT:    retq
    584 ;
    585 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    586 ; X32-AVX512F:       # BB#0:
    587 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    588 ; X32-AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    589 ; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    590 ; X32-AVX512F-NEXT:    retl
    591   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
    592   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
    593   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
    594   %ptr7 = getelementptr inbounds i8, i8* %ptr, i64 8
    595   %val0 = load i8, i8* %ptr0
    596   %val1 = load i8, i8* %ptr1
    597   %val3 = load i8, i8* %ptr3
    598   %val7 = load i8, i8* %ptr7
    599   %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
    600   %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
    601   %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
    602   %res7  = insertelement <64 x i8> %res3,  i8 %val7, i8 7
    603   %res14 = insertelement <64 x i8> %res7,  i8     0, i8 14
    604   %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
    605   %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
    606   %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
    607   %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
    608   ret <64 x i8> %res63
    609 }
    610 
    611 define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(i8* %ptr) nounwind uwtable noinline ssp {
    612 ; AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    613 ; AVX512F:       # BB#0:
    614 ; AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    615 ; AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    616 ; AVX512F-NEXT:    retq
    617 ;
    618 ; AVX512BW-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    619 ; AVX512BW:       # BB#0:
    620 ; AVX512BW-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    621 ; AVX512BW-NEXT:    retq
    622 ;
    623 ; X32-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
    624 ; X32-AVX512F:       # BB#0:
    625 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    626 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    627 ; X32-AVX512F-NEXT:    vxorps %ymm1, %ymm1, %ymm1
    628 ; X32-AVX512F-NEXT:    retl
    629   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 1
    630   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 2
    631   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 4
    632   %val0 = load i8, i8* %ptr0
    633   %val1 = load i8, i8* %ptr1
    634   %val3 = load i8, i8* %ptr3
    635   %res0  = insertelement <64 x i8> undef,  i8 %val0, i8 0
    636   %res1  = insertelement <64 x i8> %res0,  i8 %val1, i8 1
    637   %res3  = insertelement <64 x i8> %res1,  i8 %val3, i8 3
    638   %res14 = insertelement <64 x i8> %res3,  i8     0, i8 14
    639   %res15 = insertelement <64 x i8> %res14, i8     0, i8 15
    640   %res16 = insertelement <64 x i8> %res15, i8     0, i8 16
    641   %res17 = insertelement <64 x i8> %res16, i8     0, i8 17
    642   %res63 = insertelement <64 x i8> %res17, i8     0, i8 63
    643   ret <64 x i8> %res63
    644 }
    645 
    646 ;
    647 ; consecutive loads including any/all volatiles may not be combined
    648 ;
    649 
    650 define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(double* %ptr) nounwind uwtable noinline ssp {
    651 ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
    652 ; ALL:       # BB#0:
    653 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    654 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    655 ; ALL-NEXT:    vbroadcastsd 72(%rdi), %ymm1
    656 ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    657 ; ALL-NEXT:    retq
    658 ;
    659 ; X32-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
    660 ; X32-AVX512F:       # BB#0:
    661 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    662 ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    663 ; X32-AVX512F-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    664 ; X32-AVX512F-NEXT:    vbroadcastsd 72(%eax), %ymm1
    665 ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
    666 ; X32-AVX512F-NEXT:    retl
    667   %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
    668   %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
    669   %ptr7 = getelementptr inbounds double, double* %ptr, i64 9
    670   %val0 = load volatile double, double* %ptr0
    671   %val1 = load double, double* %ptr1
    672   %val7 = load double, double* %ptr7
    673   %res0 = insertelement <8 x double> undef, double %val0, i32 0
    674   %res1 = insertelement <8 x double> %res0, double %val1, i32 1
    675   %res7 = insertelement <8 x double> %res1, double %val7, i32 7
    676   ret <8 x double> %res7
    677 }
    678 
    679 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(i32* %ptr) nounwind uwtable noinline ssp {
    680 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
    681 ; ALL:       # BB#0:
    682 ; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    683 ; ALL-NEXT:    vpinsrd $3, 12(%rdi), %xmm0, %xmm0
    684 ; ALL-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    685 ; ALL-NEXT:    vpinsrd $2, 56(%rdi), %xmm1, %xmm1
    686 ; ALL-NEXT:    vpinsrd $3, 60(%rdi), %xmm1, %xmm1
    687 ; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
    688 ; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    689 ; ALL-NEXT:    retq
    690 ;
    691 ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
    692 ; X32-AVX512F:       # BB#0:
    693 ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
    694 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
    695 ; X32-AVX512F-NEXT:    vpinsrd $3, 12(%eax), %xmm0, %xmm0
    696 ; X32-AVX512F-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
    697 ; X32-AVX512F-NEXT:    vpinsrd $2, 56(%eax), %xmm1, %xmm1
    698 ; X32-AVX512F-NEXT:    vpinsrd $3, 60(%eax), %xmm1, %xmm1
    699 ; X32-AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
    700 ; X32-AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    701 ; X32-AVX512F-NEXT:    retl
    702   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
    703   %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
    704   %ptrC = getelementptr inbounds i32, i32* %ptr, i64 12
    705   %ptrE = getelementptr inbounds i32, i32* %ptr, i64 14
    706   %ptrF = getelementptr inbounds i32, i32* %ptr, i64 15
    707   %val0 = load volatile i32, i32* %ptr0
    708   %val3 = load volatile i32, i32* %ptr3
    709   %valC = load volatile i32, i32* %ptrC
    710   %valE = load volatile i32, i32* %ptrE
    711   %valF = load volatile i32, i32* %ptrF
    712   %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
    713   %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
    714   %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
    715   %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
    716   %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
    717   ret <16 x i32> %resF
    718 }
    719