Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
      5 ;
      6 ; Just one 32-bit run to make sure we do reasonable things.
      7 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
      8 
      9 define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noinline ssp {
     10 ; AVX-LABEL: merge_4f64_2f64_23:
     11 ; AVX:       # %bb.0:
     12 ; AVX-NEXT:    vmovups 32(%rdi), %ymm0
     13 ; AVX-NEXT:    retq
     14 ;
     15 ; X32-AVX-LABEL: merge_4f64_2f64_23:
     16 ; X32-AVX:       # %bb.0:
     17 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     18 ; X32-AVX-NEXT:    vmovups 32(%eax), %ymm0
     19 ; X32-AVX-NEXT:    retl
     20   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     21   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 3
     22   %val0 = load <2 x double>, <2 x double>* %ptr0
     23   %val1 = load <2 x double>, <2 x double>* %ptr1
     24   %res = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     25   ret <4 x double> %res
     26 }
     27 
     28 define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
     29 ; AVX-LABEL: merge_4f64_2f64_2z:
     30 ; AVX:       # %bb.0:
     31 ; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
     32 ; AVX-NEXT:    retq
     33 ;
     34 ; X32-AVX-LABEL: merge_4f64_2f64_2z:
     35 ; X32-AVX:       # %bb.0:
     36 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     37 ; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
     38 ; X32-AVX-NEXT:    retl
     39   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
     40   %val0 = load <2 x double>, <2 x double>* %ptr0
     41   %res = shufflevector <2 x double> %val0, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     42   ret <4 x double> %res
     43 }
     44 
     45 define <4 x double> @merge_4f64_f64_2345(double* %ptr) nounwind uwtable noinline ssp {
     46 ; AVX-LABEL: merge_4f64_f64_2345:
     47 ; AVX:       # %bb.0:
     48 ; AVX-NEXT:    vmovups 16(%rdi), %ymm0
     49 ; AVX-NEXT:    retq
     50 ;
     51 ; X32-AVX-LABEL: merge_4f64_f64_2345:
     52 ; X32-AVX:       # %bb.0:
     53 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     54 ; X32-AVX-NEXT:    vmovups 16(%eax), %ymm0
     55 ; X32-AVX-NEXT:    retl
     56   %ptr0 = getelementptr inbounds double, double* %ptr, i64 2
     57   %ptr1 = getelementptr inbounds double, double* %ptr, i64 3
     58   %ptr2 = getelementptr inbounds double, double* %ptr, i64 4
     59   %ptr3 = getelementptr inbounds double, double* %ptr, i64 5
     60   %val0 = load double, double* %ptr0
     61   %val1 = load double, double* %ptr1
     62   %val2 = load double, double* %ptr2
     63   %val3 = load double, double* %ptr3
     64   %res0 = insertelement <4 x double> undef, double %val0, i32 0
     65   %res1 = insertelement <4 x double> %res0, double %val1, i32 1
     66   %res2 = insertelement <4 x double> %res1, double %val2, i32 2
     67   %res3 = insertelement <4 x double> %res2, double %val3, i32 3
     68   ret <4 x double> %res3
     69 }
     70 
     71 define <4 x double> @merge_4f64_f64_3zuu(double* %ptr) nounwind uwtable noinline ssp {
     72 ; AVX-LABEL: merge_4f64_f64_3zuu:
     73 ; AVX:       # %bb.0:
     74 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     75 ; AVX-NEXT:    retq
     76 ;
     77 ; X32-AVX-LABEL: merge_4f64_f64_3zuu:
     78 ; X32-AVX:       # %bb.0:
     79 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     80 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     81 ; X32-AVX-NEXT:    retl
     82   %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
     83   %val0 = load double, double* %ptr0
     84   %res0 = insertelement <4 x double> undef, double %val0, i32 0
     85   %res1 = insertelement <4 x double> %res0, double 0.0, i32 1
     86   ret <4 x double> %res1
     87 }
     88 
     89 define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline ssp {
     90 ; AVX-LABEL: merge_4f64_f64_34uu:
     91 ; AVX:       # %bb.0:
     92 ; AVX-NEXT:    vmovups 24(%rdi), %xmm0
     93 ; AVX-NEXT:    retq
     94 ;
     95 ; X32-AVX-LABEL: merge_4f64_f64_34uu:
     96 ; X32-AVX:       # %bb.0:
     97 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     98 ; X32-AVX-NEXT:    vmovups 24(%eax), %xmm0
     99 ; X32-AVX-NEXT:    retl
    100   %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
    101   %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
    102   %val0 = load double, double* %ptr0
    103   %val1 = load double, double* %ptr1
    104   %res0 = insertelement <4 x double> undef, double %val0, i32 0
    105   %res1 = insertelement <4 x double> %res0, double %val1, i32 1
    106   ret <4 x double> %res1
    107 }
    108 
    109 define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
    110 ; AVX-LABEL: merge_4f64_f64_45zz:
    111 ; AVX:       # %bb.0:
    112 ; AVX-NEXT:    vmovups 32(%rdi), %xmm0
    113 ; AVX-NEXT:    retq
    114 ;
    115 ; X32-AVX-LABEL: merge_4f64_f64_45zz:
    116 ; X32-AVX:       # %bb.0:
    117 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    118 ; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
    119 ; X32-AVX-NEXT:    retl
    120   %ptr0 = getelementptr inbounds double, double* %ptr, i64 4
    121   %ptr1 = getelementptr inbounds double, double* %ptr, i64 5
    122   %val0 = load double, double* %ptr0
    123   %val1 = load double, double* %ptr1
    124   %res0 = insertelement <4 x double> zeroinitializer, double %val0, i32 0
    125   %res1 = insertelement <4 x double> %res0, double %val1, i32 1
    126   ret <4 x double> %res1
    127 }
    128 
    129 define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline ssp {
    130 ; AVX-LABEL: merge_4f64_f64_34z6:
    131 ; AVX:       # %bb.0:
    132 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    133 ; AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
    134 ; AVX-NEXT:    retq
    135 ;
    136 ; X32-AVX-LABEL: merge_4f64_f64_34z6:
    137 ; X32-AVX:       # %bb.0:
    138 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    139 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    140 ; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
    141 ; X32-AVX-NEXT:    retl
    142   %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
    143   %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
    144   %ptr3 = getelementptr inbounds double, double* %ptr, i64 6
    145   %val0 = load double, double* %ptr0
    146   %val1 = load double, double* %ptr1
    147   %val3 = load double, double* %ptr3
    148   %res0 = insertelement <4 x double> undef, double %val0, i32 0
    149   %res1 = insertelement <4 x double> %res0, double %val1, i32 1
    150   %res2 = insertelement <4 x double> %res1, double   0.0, i32 2
    151   %res3 = insertelement <4 x double> %res2, double %val3, i32 3
    152   ret <4 x double> %res3
    153 }
    154 
    155 define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
    156 ; AVX-LABEL: merge_4i64_2i64_3z:
    157 ; AVX:       # %bb.0:
    158 ; AVX-NEXT:    vmovaps 48(%rdi), %xmm0
    159 ; AVX-NEXT:    retq
    160 ;
    161 ; X32-AVX-LABEL: merge_4i64_2i64_3z:
    162 ; X32-AVX:       # %bb.0:
    163 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    164 ; X32-AVX-NEXT:    vmovaps 48(%eax), %xmm0
    165 ; X32-AVX-NEXT:    retl
    166   %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
    167   %val0 = load <2 x i64>, <2 x i64>* %ptr0
    168   %res = shufflevector <2 x i64> %val0, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    169   ret <4 x i64> %res
    170 }
    171 
    172 define <4 x i64> @merge_4i64_i64_1234(i64* %ptr) nounwind uwtable noinline ssp {
    173 ; AVX-LABEL: merge_4i64_i64_1234:
    174 ; AVX:       # %bb.0:
    175 ; AVX-NEXT:    vmovups 8(%rdi), %ymm0
    176 ; AVX-NEXT:    retq
    177 ;
    178 ; X32-AVX-LABEL: merge_4i64_i64_1234:
    179 ; X32-AVX:       # %bb.0:
    180 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    181 ; X32-AVX-NEXT:    vmovups 8(%eax), %ymm0
    182 ; X32-AVX-NEXT:    retl
    183   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
    184   %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 2
    185   %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3
    186   %ptr3 = getelementptr inbounds i64, i64* %ptr, i64 4
    187   %val0 = load i64, i64* %ptr0
    188   %val1 = load i64, i64* %ptr1
    189   %val2 = load i64, i64* %ptr2
    190   %val3 = load i64, i64* %ptr3
    191   %res0 = insertelement <4 x i64> undef, i64 %val0, i32 0
    192   %res1 = insertelement <4 x i64> %res0, i64 %val1, i32 1
    193   %res2 = insertelement <4 x i64> %res1, i64 %val2, i32 2
    194   %res3 = insertelement <4 x i64> %res2, i64 %val3, i32 3
    195   ret <4 x i64> %res3
    196 }
    197 
    198 define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
    199 ; AVX-LABEL: merge_4i64_i64_1zzu:
    200 ; AVX:       # %bb.0:
    201 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    202 ; AVX-NEXT:    retq
    203 ;
    204 ; X32-AVX-LABEL: merge_4i64_i64_1zzu:
    205 ; X32-AVX:       # %bb.0:
    206 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    207 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    208 ; X32-AVX-NEXT:    retl
    209   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1
    210   %val0 = load i64, i64* %ptr0
    211   %res0 = insertelement <4 x i64> undef, i64 %val0, i32 0
    212   %res1 = insertelement <4 x i64> %res0, i64 0, i32 1
    213   %res2 = insertelement <4 x i64> %res1, i64 0, i32 2
    214   ret <4 x i64> %res2
    215 }
    216 
    217 define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
    218 ; AVX-LABEL: merge_4i64_i64_23zz:
    219 ; AVX:       # %bb.0:
    220 ; AVX-NEXT:    vmovups 16(%rdi), %xmm0
    221 ; AVX-NEXT:    retq
    222 ;
    223 ; X32-AVX-LABEL: merge_4i64_i64_23zz:
    224 ; X32-AVX:       # %bb.0:
    225 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    226 ; X32-AVX-NEXT:    vmovups 16(%eax), %xmm0
    227 ; X32-AVX-NEXT:    retl
    228   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
    229   %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
    230   %val0 = load i64, i64* %ptr0
    231   %val1 = load i64, i64* %ptr1
    232   %res0 = insertelement <4 x i64> zeroinitializer, i64 %val0, i32 0
    233   %res1 = insertelement <4 x i64> %res0, i64 %val1, i32 1
    234   ret <4 x i64> %res1
    235 }
    236 
    237 define <8 x float> @merge_8f32_2f32_23z5(<2 x float>* %ptr) nounwind uwtable noinline ssp {
    238 ; AVX1-LABEL: merge_8f32_2f32_23z5:
    239 ; AVX1:       # %bb.0:
    240 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    241 ; AVX1-NEXT:    vmovups 16(%rdi), %xmm1
    242 ; AVX1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    243 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    244 ; AVX1-NEXT:    retq
    245 ;
    246 ; AVX2-LABEL: merge_8f32_2f32_23z5:
    247 ; AVX2:       # %bb.0:
    248 ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    249 ; AVX2-NEXT:    vmovdqu 16(%rdi), %xmm1
    250 ; AVX2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    251 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    252 ; AVX2-NEXT:    retq
    253 ;
    254 ; AVX512F-LABEL: merge_8f32_2f32_23z5:
    255 ; AVX512F:       # %bb.0:
    256 ; AVX512F-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    257 ; AVX512F-NEXT:    vmovdqu 16(%rdi), %xmm1
    258 ; AVX512F-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
    259 ; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    260 ; AVX512F-NEXT:    retq
    261 ;
    262 ; X32-AVX-LABEL: merge_8f32_2f32_23z5:
    263 ; X32-AVX:       # %bb.0:
    264 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    265 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    266 ; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4,5],mem[6,7]
    267 ; X32-AVX-NEXT:    retl
    268   %ptr0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 2
    269   %ptr1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
    270   %ptr3 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 5
    271   %val0 = load <2 x float>, <2 x float>* %ptr0
    272   %val1 = load <2 x float>, <2 x float>* %ptr1
    273   %val3 = load <2 x float>, <2 x float>* %ptr3
    274   %res01 = shufflevector <2 x float> %val0, <2 x float> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    275   %res23 = shufflevector <2 x float> zeroinitializer, <2 x float> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    276   %res = shufflevector <4 x float> %res01, <4 x float> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    277   ret <8 x float> %res
    278 }
    279 
    280 define <8 x float> @merge_8f32_4f32_z2(<4 x float>* %ptr) nounwind uwtable noinline ssp {
    281 ; AVX-LABEL: merge_8f32_4f32_z2:
    282 ; AVX:       # %bb.0:
    283 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    284 ; AVX-NEXT:    vinsertf128 $1, 32(%rdi), %ymm0, %ymm0
    285 ; AVX-NEXT:    retq
    286 ;
    287 ; X32-AVX-LABEL: merge_8f32_4f32_z2:
    288 ; X32-AVX:       # %bb.0:
    289 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    290 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    291 ; X32-AVX-NEXT:    vinsertf128 $1, 32(%eax), %ymm0, %ymm0
    292 ; X32-AVX-NEXT:    retl
    293   %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
    294   %val1 = load <4 x float>, <4 x float>* %ptr1
    295   %res = shufflevector <4 x float> zeroinitializer, <4 x float> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    296   ret <8 x float> %res
    297 }
    298 
    299 define <8 x float> @merge_8f32_f32_12zzuuzz(float* %ptr) nounwind uwtable noinline ssp {
    300 ; AVX-LABEL: merge_8f32_f32_12zzuuzz:
    301 ; AVX:       # %bb.0:
    302 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    303 ; AVX-NEXT:    retq
    304 ;
    305 ; X32-AVX-LABEL: merge_8f32_f32_12zzuuzz:
    306 ; X32-AVX:       # %bb.0:
    307 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    308 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    309 ; X32-AVX-NEXT:    retl
    310   %ptr0 = getelementptr inbounds float, float* %ptr, i64 1
    311   %ptr1 = getelementptr inbounds float, float* %ptr, i64 2
    312   %val0 = load float, float* %ptr0
    313   %val1 = load float, float* %ptr1
    314   %res0 = insertelement <8 x float> undef, float %val0, i32 0
    315   %res1 = insertelement <8 x float> %res0, float %val1, i32 1
    316   %res2 = insertelement <8 x float> %res1, float   0.0, i32 2
    317   %res3 = insertelement <8 x float> %res2, float   0.0, i32 3
    318   %res6 = insertelement <8 x float> %res3, float   0.0, i32 6
    319   %res7 = insertelement <8 x float> %res6, float   0.0, i32 7
    320   ret <8 x float> %res7
    321 }
    322 
    323 define <8 x float> @merge_8f32_f32_1u3u5zu8(float* %ptr) nounwind uwtable noinline ssp {
    324 ; AVX-LABEL: merge_8f32_f32_1u3u5zu8:
    325 ; AVX:       # %bb.0:
    326 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    327 ; AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
    328 ; AVX-NEXT:    retq
    329 ;
    330 ; X32-AVX-LABEL: merge_8f32_f32_1u3u5zu8:
    331 ; X32-AVX:       # %bb.0:
    332 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    333 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    334 ; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
    335 ; X32-AVX-NEXT:    retl
    336   %ptr0 = getelementptr inbounds float, float* %ptr, i64 1
    337   %ptr2 = getelementptr inbounds float, float* %ptr, i64 3
    338   %ptr4 = getelementptr inbounds float, float* %ptr, i64 5
    339   %ptr7 = getelementptr inbounds float, float* %ptr, i64 8
    340   %val0 = load float, float* %ptr0
    341   %val2 = load float, float* %ptr2
    342   %val4 = load float, float* %ptr4
    343   %val7 = load float, float* %ptr7
    344   %res0 = insertelement <8 x float> undef, float %val0, i32 0
    345   %res2 = insertelement <8 x float> %res0, float %val2, i32 2
    346   %res4 = insertelement <8 x float> %res2, float %val4, i32 4
    347   %res5 = insertelement <8 x float> %res4, float   0.0, i32 5
    348   %res7 = insertelement <8 x float> %res5, float %val7, i32 7
    349   ret <8 x float> %res7
    350 }
    351 
    352 define <8 x i32> @merge_8i32_4i32_z3(<4 x i32>* %ptr) nounwind uwtable noinline ssp {
    353 ; AVX-LABEL: merge_8i32_4i32_z3:
    354 ; AVX:       # %bb.0:
    355 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    356 ; AVX-NEXT:    vinsertf128 $1, 48(%rdi), %ymm0, %ymm0
    357 ; AVX-NEXT:    retq
    358 ;
    359 ; X32-AVX-LABEL: merge_8i32_4i32_z3:
    360 ; X32-AVX:       # %bb.0:
    361 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    362 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    363 ; X32-AVX-NEXT:    vinsertf128 $1, 48(%eax), %ymm0, %ymm0
    364 ; X32-AVX-NEXT:    retl
    365   %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
    366   %val1 = load <4 x i32>, <4 x i32>* %ptr1
    367   %res = shufflevector <4 x i32> zeroinitializer, <4 x i32> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    368   ret <8 x i32> %res
    369 }
    370 
    371 define <8 x i32> @merge_8i32_i32_56zz9uzz(i32* %ptr) nounwind uwtable noinline ssp {
    372 ; AVX-LABEL: merge_8i32_i32_56zz9uzz:
    373 ; AVX:       # %bb.0:
    374 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    375 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    376 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    377 ; AVX-NEXT:    retq
    378 ;
    379 ; X32-AVX-LABEL: merge_8i32_i32_56zz9uzz:
    380 ; X32-AVX:       # %bb.0:
    381 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    382 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    383 ; X32-AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    384 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    385 ; X32-AVX-NEXT:    retl
    386   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 5
    387   %ptr1 = getelementptr inbounds i32, i32* %ptr, i64 6
    388   %ptr4 = getelementptr inbounds i32, i32* %ptr, i64 9
    389   %val0 = load i32, i32* %ptr0
    390   %val1 = load i32, i32* %ptr1
    391   %val4 = load i32, i32* %ptr4
    392   %res0 = insertelement <8 x i32> undef, i32 %val0, i32 0
    393   %res1 = insertelement <8 x i32> %res0, i32 %val1, i32 1
    394   %res2 = insertelement <8 x i32> %res1, i32     0, i32 2
    395   %res3 = insertelement <8 x i32> %res2, i32     0, i32 3
    396   %res4 = insertelement <8 x i32> %res3, i32 %val4, i32 4
    397   %res6 = insertelement <8 x i32> %res4, i32     0, i32 6
    398   %res7 = insertelement <8 x i32> %res6, i32     0, i32 7
    399   ret <8 x i32> %res7
    400 }
    401 
    402 define <8 x i32> @merge_8i32_i32_1u3u5zu8(i32* %ptr) nounwind uwtable noinline ssp {
    403 ; AVX-LABEL: merge_8i32_i32_1u3u5zu8:
    404 ; AVX:       # %bb.0:
    405 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    406 ; AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
    407 ; AVX-NEXT:    retq
    408 ;
    409 ; X32-AVX-LABEL: merge_8i32_i32_1u3u5zu8:
    410 ; X32-AVX:       # %bb.0:
    411 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    412 ; X32-AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
    413 ; X32-AVX-NEXT:    vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4],ymm0[5],mem[6,7]
    414 ; X32-AVX-NEXT:    retl
    415   %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 1
    416   %ptr2 = getelementptr inbounds i32, i32* %ptr, i64 3
    417   %ptr4 = getelementptr inbounds i32, i32* %ptr, i64 5
    418   %ptr7 = getelementptr inbounds i32, i32* %ptr, i64 8
    419   %val0 = load i32, i32* %ptr0
    420   %val2 = load i32, i32* %ptr2
    421   %val4 = load i32, i32* %ptr4
    422   %val7 = load i32, i32* %ptr7
    423   %res0 = insertelement <8 x i32> undef, i32 %val0, i32 0
    424   %res2 = insertelement <8 x i32> %res0, i32 %val2, i32 2
    425   %res4 = insertelement <8 x i32> %res2, i32 %val4, i32 4
    426   %res5 = insertelement <8 x i32> %res4, i32     0, i32 5
    427   %res7 = insertelement <8 x i32> %res5, i32 %val7, i32 7
    428   ret <8 x i32> %res7
    429 }
    430 
    431 define <16 x i16> @merge_16i16_i16_89zzzuuuuuuuuuuuz(i16* %ptr) nounwind uwtable noinline ssp {
    432 ; AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
    433 ; AVX:       # %bb.0:
    434 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    435 ; AVX-NEXT:    retq
    436 ;
    437 ; X32-AVX-LABEL: merge_16i16_i16_89zzzuuuuuuuuuuuz:
    438 ; X32-AVX:       # %bb.0:
    439 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    440 ; X32-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    441 ; X32-AVX-NEXT:    retl
    442   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 8
    443   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 9
    444   %val0 = load i16, i16* %ptr0
    445   %val1 = load i16, i16* %ptr1
    446   %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
    447   %res1 = insertelement <16 x i16> %res0, i16 %val1, i16 1
    448   %res2 = insertelement <16 x i16> %res1, i16     0, i16 2
    449   %res3 = insertelement <16 x i16> %res2, i16     0, i16 3
    450   %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
    451   %resF = insertelement <16 x i16> %res4, i16     0, i16 15
    452   ret <16 x i16> %resF
    453 }
    454 
    455 define <16 x i16> @merge_16i16_i16_45u7uuuuuuuuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
    456 ; AVX-LABEL: merge_16i16_i16_45u7uuuuuuuuuuuu:
    457 ; AVX:       # %bb.0:
    458 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    459 ; AVX-NEXT:    retq
    460 ;
    461 ; X32-AVX-LABEL: merge_16i16_i16_45u7uuuuuuuuuuuu:
    462 ; X32-AVX:       # %bb.0:
    463 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    464 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    465 ; X32-AVX-NEXT:    retl
    466   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 4
    467   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 5
    468   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 7
    469   %val0 = load i16, i16* %ptr0
    470   %val1 = load i16, i16* %ptr1
    471   %val3 = load i16, i16* %ptr3
    472   %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
    473   %res1 = insertelement <16 x i16> %res0, i16 %val1, i16 1
    474   %res3 = insertelement <16 x i16> %res1, i16 %val3, i16 3
    475   ret <16 x i16> %res3
    476 }
    477 
    478 define <16 x i16> @merge_16i16_i16_0uu3uuuuuuuuCuEF(i16* %ptr) nounwind uwtable noinline ssp {
    479 ; AVX-LABEL: merge_16i16_i16_0uu3uuuuuuuuCuEF:
    480 ; AVX:       # %bb.0:
    481 ; AVX-NEXT:    vmovups (%rdi), %ymm0
    482 ; AVX-NEXT:    retq
    483 ;
    484 ; X32-AVX-LABEL: merge_16i16_i16_0uu3uuuuuuuuCuEF:
    485 ; X32-AVX:       # %bb.0:
    486 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    487 ; X32-AVX-NEXT:    vmovups (%eax), %ymm0
    488 ; X32-AVX-NEXT:    retl
    489   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
    490   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
    491   %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
    492   %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
    493   %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
    494   %val0 = load i16, i16* %ptr0
    495   %val3 = load i16, i16* %ptr3
    496   %valC = load i16, i16* %ptrC
    497   %valE = load i16, i16* %ptrE
    498   %valF = load i16, i16* %ptrF
    499   %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
    500   %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
    501   %resC = insertelement <16 x i16> %res3, i16 %valC, i16 12
    502   %resE = insertelement <16 x i16> %resC, i16 %valE, i16 14
    503   %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
    504   ret <16 x i16> %resF
    505 }
    506 
    507 define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF(i16* %ptr) nounwind uwtable noinline ssp {
    508 ; AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
    509 ; AVX:       # %bb.0:
    510 ; AVX-NEXT:    vmovups (%rdi), %ymm0
    511 ; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    512 ; AVX-NEXT:    retq
    513 ;
    514 ; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF:
    515 ; X32-AVX:       # %bb.0:
    516 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    517 ; X32-AVX-NEXT:    vmovups (%eax), %ymm0
    518 ; X32-AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
    519 ; X32-AVX-NEXT:    retl
    520   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
    521   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
    522   %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
    523   %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
    524   %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
    525   %val0 = load i16, i16* %ptr0
    526   %val3 = load i16, i16* %ptr3
    527   %valC = load i16, i16* %ptrC
    528   %valE = load i16, i16* %ptrE
    529   %valF = load i16, i16* %ptrF
    530   %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
    531   %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
    532   %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
    533   %res5 = insertelement <16 x i16> %res4, i16     0, i16 5
    534   %resC = insertelement <16 x i16> %res5, i16 %valC, i16 12
    535   %resD = insertelement <16 x i16> %resC, i16     0, i16 13
    536   %resE = insertelement <16 x i16> %resD, i16 %valE, i16 14
    537   %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
    538   ret <16 x i16> %resF
    539 }
    540 
    541 define <32 x i8> @merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
    542 ; AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    543 ; AVX:       # %bb.0:
    544 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    545 ; AVX-NEXT:    retq
    546 ;
    547 ; X32-AVX-LABEL: merge_32i8_i8_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
    548 ; X32-AVX:       # %bb.0:
    549 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    550 ; X32-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    551 ; X32-AVX-NEXT:    retl
    552   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 4
    553   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 5
    554   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 7
    555   %val0 = load i8, i8* %ptr0
    556   %val1 = load i8, i8* %ptr1
    557   %val3 = load i8, i8* %ptr3
    558   %res0 = insertelement <32 x i8> undef, i8 %val0, i8 0
    559   %res1 = insertelement <32 x i8> %res0, i8 %val1, i8 1
    560   %res3 = insertelement <32 x i8> %res1, i8 %val3, i8 3
    561   ret <32 x i8> %res3
    562 }
    563 
    564 define <32 x i8> @merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu(i8* %ptr) nounwind uwtable noinline ssp {
    565 ; AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    566 ; AVX:       # %bb.0:
    567 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    568 ; AVX-NEXT:    retq
    569 ;
    570 ; X32-AVX-LABEL: merge_32i8_i8_23u5uuuuuuuuuuzzzzuuuuuuuuuuuuuu:
    571 ; X32-AVX:       # %bb.0:
    572 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    573 ; X32-AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    574 ; X32-AVX-NEXT:    retl
    575   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 2
    576   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 3
    577   %ptr3 = getelementptr inbounds i8, i8* %ptr, i64 5
    578   %val0 = load i8, i8* %ptr0
    579   %val1 = load i8, i8* %ptr1
    580   %val3 = load i8, i8* %ptr3
    581   %res0 = insertelement <32 x i8> undef, i8 %val0, i8 0
    582   %res1 = insertelement <32 x i8> %res0, i8 %val1, i8 1
    583   %res3 = insertelement <32 x i8> %res1, i8 %val3, i8 3
    584   %resE = insertelement <32 x i8> %res3, i8     0, i8 14
    585   %resF = insertelement <32 x i8> %resE, i8     0, i8 15
    586   %resG = insertelement <32 x i8> %resF, i8     0, i8 16
    587   %resH = insertelement <32 x i8> %resG, i8     0, i8 17
    588   ret <32 x i8> %resH
    589 }
    590 
    591 ;
    592 ; consecutive loads including any/all volatiles may not be combined
    593 ;
    594 
    595 define <4 x double> @merge_4f64_f64_34uz_volatile(double* %ptr) nounwind uwtable noinline ssp {
    596 ; AVX-LABEL: merge_4f64_f64_34uz_volatile:
    597 ; AVX:       # %bb.0:
    598 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    599 ; AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    600 ; AVX-NEXT:    retq
    601 ;
    602 ; X32-AVX-LABEL: merge_4f64_f64_34uz_volatile:
    603 ; X32-AVX:       # %bb.0:
    604 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    605 ; X32-AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    606 ; X32-AVX-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
    607 ; X32-AVX-NEXT:    retl
    608   %ptr0 = getelementptr inbounds double, double* %ptr, i64 3
    609   %ptr1 = getelementptr inbounds double, double* %ptr, i64 4
    610   %val0 = load volatile double, double* %ptr0
    611   %val1 = load volatile double, double* %ptr1
    612   %res0 = insertelement <4 x double> undef, double %val0, i32 0
    613   %res1 = insertelement <4 x double> %res0, double %val1, i32 1
    614   %res3 = insertelement <4 x double> %res1, double   0.0, i32 3
    615   ret <4 x double> %res3
    616 }
    617 
    618 define <16 x i16> @merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile(i16* %ptr) nounwind uwtable noinline ssp {
    619 ; AVX1-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
    620 ; AVX1:       # %bb.0:
    621 ; AVX1-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    622 ; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
    623 ; AVX1-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
    624 ; AVX1-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
    625 ; AVX1-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
    626 ; AVX1-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
    627 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    628 ; AVX1-NEXT:    retq
    629 ;
    630 ; AVX2-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
    631 ; AVX2:       # %bb.0:
    632 ; AVX2-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    633 ; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
    634 ; AVX2-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
    635 ; AVX2-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
    636 ; AVX2-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
    637 ; AVX2-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
    638 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    639 ; AVX2-NEXT:    retq
    640 ;
    641 ; AVX512F-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
    642 ; AVX512F:       # %bb.0:
    643 ; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    644 ; AVX512F-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm1
    645 ; AVX512F-NEXT:    vpinsrw $4, 24(%rdi), %xmm0, %xmm0
    646 ; AVX512F-NEXT:    vpinsrw $6, 28(%rdi), %xmm0, %xmm0
    647 ; AVX512F-NEXT:    vpinsrw $7, 30(%rdi), %xmm0, %xmm0
    648 ; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm1, %xmm1
    649 ; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    650 ; AVX512F-NEXT:    retq
    651 ;
    652 ; X32-AVX-LABEL: merge_16i16_i16_0uu3zzuuuuuzCuEF_volatile:
    653 ; X32-AVX:       # %bb.0:
    654 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    655 ; X32-AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    656 ; X32-AVX-NEXT:    vpinsrw $0, (%eax), %xmm0, %xmm1
    657 ; X32-AVX-NEXT:    vpinsrw $4, 24(%eax), %xmm0, %xmm0
    658 ; X32-AVX-NEXT:    vpinsrw $6, 28(%eax), %xmm0, %xmm0
    659 ; X32-AVX-NEXT:    vpinsrw $7, 30(%eax), %xmm0, %xmm0
    660 ; X32-AVX-NEXT:    vpinsrw $3, 6(%eax), %xmm1, %xmm1
    661 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    662 ; X32-AVX-NEXT:    retl
    663   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 0
    664   %ptr3 = getelementptr inbounds i16, i16* %ptr, i64 3
    665   %ptrC = getelementptr inbounds i16, i16* %ptr, i64 12
    666   %ptrE = getelementptr inbounds i16, i16* %ptr, i64 14
    667   %ptrF = getelementptr inbounds i16, i16* %ptr, i64 15
    668   %val0 = load volatile i16, i16* %ptr0
    669   %val3 = load i16, i16* %ptr3
    670   %valC = load i16, i16* %ptrC
    671   %valE = load i16, i16* %ptrE
    672   %valF = load volatile i16, i16* %ptrF
    673   %res0 = insertelement <16 x i16> undef, i16 %val0, i16 0
    674   %res3 = insertelement <16 x i16> %res0, i16 %val3, i16 3
    675   %res4 = insertelement <16 x i16> %res3, i16     0, i16 4
    676   %res5 = insertelement <16 x i16> %res4, i16     0, i16 5
    677   %resC = insertelement <16 x i16> %res5, i16 %valC, i16 12
    678   %resD = insertelement <16 x i16> %resC, i16     0, i16 13
    679   %resE = insertelement <16 x i16> %resD, i16 %valE, i16 14
    680   %resF = insertelement <16 x i16> %resE, i16 %valF, i16 15
    681   ret <16 x i16> %resF
    682 }
    683