Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX2
      3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X86 --check-prefix=X86-AVX512
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=X64 --check-prefix=X64-AVX512
      6 
      7 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
      8 ; X86-LABEL: test_x86_avx2_pblendw:
      9 ; X86:       ## %bb.0:
     10 ; X86-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
     11 ; X86-NEXT:    retl
     12 ;
     13 ; X64-LABEL: test_x86_avx2_pblendw:
     14 ; X64:       ## %bb.0:
     15 ; X64-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
     16 ; X64-NEXT:    retq
     17   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
     18   ret <16 x i16> %res
     19 }
     20 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
     21 
     22 
     23 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
     24 ; X86-LABEL: test_x86_avx2_pblendd_128:
     25 ; X86:       ## %bb.0:
     26 ; X86-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
     27 ; X86-NEXT:    retl
     28 ;
     29 ; X64-LABEL: test_x86_avx2_pblendd_128:
     30 ; X64:       ## %bb.0:
     31 ; X64-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
     32 ; X64-NEXT:    retq
     33   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
     34   ret <4 x i32> %res
     35 }
     36 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
     37 
     38 
     39 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
     40 ; X86-LABEL: test_x86_avx2_pblendd_256:
     41 ; X86:       ## %bb.0:
     42 ; X86-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
     43 ; X86-NEXT:    retl
     44 ;
     45 ; X64-LABEL: test_x86_avx2_pblendd_256:
     46 ; X64:       ## %bb.0:
     47 ; X64-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
     48 ; X64-NEXT:    retq
     49   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
     50   ret <8 x i32> %res
     51 }
     52 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
     53 
     54 
     55 define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
     56 ; X86-LABEL: test_x86_avx2_movntdqa:
     57 ; X86:       ## %bb.0:
     58 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     59 ; X86-NEXT:    vmovntdqa (%eax), %ymm0
     60 ; X86-NEXT:    retl
     61 ;
     62 ; X64-LABEL: test_x86_avx2_movntdqa:
     63 ; X64:       ## %bb.0:
     64 ; X64-NEXT:    vmovntdqa (%rdi), %ymm0
     65 ; X64-NEXT:    retq
     66   %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
     67   ret <4 x i64> %res
     68 }
     69 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
     70 
     71 
     72 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
     73 ; X86-LABEL: test_x86_avx2_mpsadbw:
     74 ; X86:       ## %bb.0:
     75 ; X86-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
     76 ; X86-NEXT:    retl
     77 ;
     78 ; X64-LABEL: test_x86_avx2_mpsadbw:
     79 ; X64:       ## %bb.0:
     80 ; X64-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
     81 ; X64-NEXT:    retq
     82   %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
     83   ret <16 x i16> %res
     84 }
     85 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
     86 
     87 
     88 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
     89 ; X86-LABEL: test_x86_avx2_psll_dq_bs:
     90 ; X86:       ## %bb.0:
     91 ; X86-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
     92 ; X86-NEXT:    retl
     93 ;
     94 ; X64-LABEL: test_x86_avx2_psll_dq_bs:
     95 ; X64:       ## %bb.0:
     96 ; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
     97 ; X64-NEXT:    retq
     98   %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
     99   ret <4 x i64> %res
    100 }
    101 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
    102 
    103 
    104 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
    105 ; X86-LABEL: test_x86_avx2_psrl_dq_bs:
    106 ; X86:       ## %bb.0:
    107 ; X86-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
    108 ; X86-NEXT:    retl
    109 ;
    110 ; X64-LABEL: test_x86_avx2_psrl_dq_bs:
    111 ; X64:       ## %bb.0:
    112 ; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
    113 ; X64-NEXT:    retq
    114   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
    115   ret <4 x i64> %res
    116 }
    117 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
    118 
    119 
    120 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
    121 ; X86-LABEL: test_x86_avx2_psll_dq:
    122 ; X86:       ## %bb.0:
    123 ; X86-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
    124 ; X86-NEXT:    retl
    125 ;
    126 ; X64-LABEL: test_x86_avx2_psll_dq:
    127 ; X64:       ## %bb.0:
    128 ; X64-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
    129 ; X64-NEXT:    retq
    130   %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
    131   ret <4 x i64> %res
    132 }
    133 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
    134 
    135 
    136 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
    137 ; X86-LABEL: test_x86_avx2_psrl_dq:
    138 ; X86:       ## %bb.0:
    139 ; X86-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
    140 ; X86-NEXT:    retl
    141 ;
    142 ; X64-LABEL: test_x86_avx2_psrl_dq:
    143 ; X64:       ## %bb.0:
    144 ; X64-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
    145 ; X64-NEXT:    retq
    146   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
    147   ret <4 x i64> %res
    148 }
    149 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
    150 
    151 
    152 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
    153 ; X86-LABEL: test_x86_avx2_vextracti128:
    154 ; X86:       ## %bb.0:
    155 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm0
    156 ; X86-NEXT:    vzeroupper
    157 ; X86-NEXT:    retl
    158 ;
    159 ; X64-LABEL: test_x86_avx2_vextracti128:
    160 ; X64:       ## %bb.0:
    161 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm0
    162 ; X64-NEXT:    vzeroupper
    163 ; X64-NEXT:    retq
    164   %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
    165   ret <2 x i64> %res
    166 }
    167 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
    168 
    169 
    170 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
    171 ; X86-LABEL: test_x86_avx2_vinserti128:
    172 ; X86:       ## %bb.0:
    173 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    174 ; X86-NEXT:    retl
    175 ;
    176 ; X64-LABEL: test_x86_avx2_vinserti128:
    177 ; X64:       ## %bb.0:
    178 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    179 ; X64-NEXT:    retq
    180   %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
    181   ret <4 x i64> %res
    182 }
    183 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
    184 
    185 
    186 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
    187 ; X86-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
    188 ; X86:       ## %bb.0:
    189 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm0
    190 ; X86-NEXT:    retl
    191 ;
    192 ; X64-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
    193 ; X64:       ## %bb.0:
    194 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    195 ; X64-NEXT:    retq
    196   %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
    197   ret <4 x double> %res
    198 }
    199 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
    200 
    201 
    202 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
    203 ; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps:
    204 ; X86:       ## %bb.0:
    205 ; X86-NEXT:    vbroadcastss %xmm0, %xmm0
    206 ; X86-NEXT:    retl
    207 ;
    208 ; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps:
    209 ; X64:       ## %bb.0:
    210 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    211 ; X64-NEXT:    retq
    212   %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
    213   ret <4 x float> %res
    214 }
    215 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
    216 
    217 
    218 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
    219 ; X86-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
    220 ; X86:       ## %bb.0:
    221 ; X86-NEXT:    vbroadcastss %xmm0, %ymm0
    222 ; X86-NEXT:    retl
    223 ;
    224 ; X64-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
    225 ; X64:       ## %bb.0:
    226 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    227 ; X64-NEXT:    retq
    228   %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
    229   ret <8 x float> %res
    230 }
    231 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
    232 
    233 
    234 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
    235 ; X86-LABEL: test_x86_avx2_pbroadcastb_128:
    236 ; X86:       ## %bb.0:
    237 ; X86-NEXT:    vpbroadcastb %xmm0, %xmm0
    238 ; X86-NEXT:    retl
    239 ;
    240 ; X64-LABEL: test_x86_avx2_pbroadcastb_128:
    241 ; X64:       ## %bb.0:
    242 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm0
    243 ; X64-NEXT:    retq
    244   %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
    245   ret <16 x i8> %res
    246 }
    247 declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
    248 
    249 
    250 define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
    251 ; X86-LABEL: test_x86_avx2_pbroadcastb_256:
    252 ; X86:       ## %bb.0:
    253 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm0
    254 ; X86-NEXT:    retl
    255 ;
    256 ; X64-LABEL: test_x86_avx2_pbroadcastb_256:
    257 ; X64:       ## %bb.0:
    258 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm0
    259 ; X64-NEXT:    retq
    260   %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
    261   ret <32 x i8> %res
    262 }
    263 declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
    264 
    265 
    266 define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
    267 ; X86-LABEL: test_x86_avx2_pbroadcastw_128:
    268 ; X86:       ## %bb.0:
    269 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm0
    270 ; X86-NEXT:    retl
    271 ;
    272 ; X64-LABEL: test_x86_avx2_pbroadcastw_128:
    273 ; X64:       ## %bb.0:
    274 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm0
    275 ; X64-NEXT:    retq
    276   %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
    277   ret <8 x i16> %res
    278 }
    279 declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
    280 
    281 
    282 define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
    283 ; X86-LABEL: test_x86_avx2_pbroadcastw_256:
    284 ; X86:       ## %bb.0:
    285 ; X86-NEXT:    vpbroadcastw %xmm0, %ymm0
    286 ; X86-NEXT:    retl
    287 ;
    288 ; X64-LABEL: test_x86_avx2_pbroadcastw_256:
    289 ; X64:       ## %bb.0:
    290 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm0
    291 ; X64-NEXT:    retq
    292   %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
    293   ret <16 x i16> %res
    294 }
    295 declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
    296 
    297 
    298 define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
    299 ; X86-LABEL: test_x86_avx2_pbroadcastd_128:
    300 ; X86:       ## %bb.0:
    301 ; X86-NEXT:    vbroadcastss %xmm0, %xmm0
    302 ; X86-NEXT:    retl
    303 ;
    304 ; X64-LABEL: test_x86_avx2_pbroadcastd_128:
    305 ; X64:       ## %bb.0:
    306 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0
    307 ; X64-NEXT:    retq
    308   %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
    309   ret <4 x i32> %res
    310 }
    311 declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
    312 
    313 
    314 define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
    315 ; X86-LABEL: test_x86_avx2_pbroadcastd_256:
    316 ; X86:       ## %bb.0:
    317 ; X86-NEXT:    vbroadcastss %xmm0, %ymm0
    318 ; X86-NEXT:    retl
    319 ;
    320 ; X64-LABEL: test_x86_avx2_pbroadcastd_256:
    321 ; X64:       ## %bb.0:
    322 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    323 ; X64-NEXT:    retq
    324   %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
    325   ret <8 x i32> %res
    326 }
    327 declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
    328 
    329 
    330 define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
    331 ; X86-LABEL: test_x86_avx2_pbroadcastq_128:
    332 ; X86:       ## %bb.0:
    333 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm0
    334 ; X86-NEXT:    retl
    335 ;
    336 ; X64-LABEL: test_x86_avx2_pbroadcastq_128:
    337 ; X64:       ## %bb.0:
    338 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0
    339 ; X64-NEXT:    retq
    340   %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
    341   ret <2 x i64> %res
    342 }
    343 declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
    344 
    345 
    346 define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
    347 ; X86-LABEL: test_x86_avx2_pbroadcastq_256:
    348 ; X86:       ## %bb.0:
    349 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm0
    350 ; X86-NEXT:    retl
    351 ;
    352 ; X64-LABEL: test_x86_avx2_pbroadcastq_256:
    353 ; X64:       ## %bb.0:
    354 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0
    355 ; X64-NEXT:    retq
    356   %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
    357   ret <4 x i64> %res
    358 }
    359 declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
    360 
    361 
    362 define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
    363 ; X86-LABEL: test_x86_avx2_pmovsxbd:
    364 ; X86:       ## %bb.0:
    365 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm0
    366 ; X86-NEXT:    retl
    367 ;
    368 ; X64-LABEL: test_x86_avx2_pmovsxbd:
    369 ; X64:       ## %bb.0:
    370 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm0
    371 ; X64-NEXT:    retq
    372   %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
    373   ret <8 x i32> %res
    374 }
    375 declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
    376 
    377 
    378 define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
    379 ; X86-LABEL: test_x86_avx2_pmovsxbq:
    380 ; X86:       ## %bb.0:
    381 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm0
    382 ; X86-NEXT:    retl
    383 ;
    384 ; X64-LABEL: test_x86_avx2_pmovsxbq:
    385 ; X64:       ## %bb.0:
    386 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm0
    387 ; X64-NEXT:    retq
    388   %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
    389   ret <4 x i64> %res
    390 }
    391 declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
    392 
    393 
    394 define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
    395 ; X86-LABEL: test_x86_avx2_pmovsxbw:
    396 ; X86:       ## %bb.0:
    397 ; X86-NEXT:    vpmovsxbw %xmm0, %ymm0
    398 ; X86-NEXT:    retl
    399 ;
    400 ; X64-LABEL: test_x86_avx2_pmovsxbw:
    401 ; X64:       ## %bb.0:
    402 ; X64-NEXT:    vpmovsxbw %xmm0, %ymm0
    403 ; X64-NEXT:    retq
    404   %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    405   ret <16 x i16> %res
    406 }
    407 declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
    408 
    409 
    410 define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
    411 ; X86-LABEL: test_x86_avx2_pmovsxdq:
    412 ; X86:       ## %bb.0:
    413 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm0
    414 ; X86-NEXT:    retl
    415 ;
    416 ; X64-LABEL: test_x86_avx2_pmovsxdq:
    417 ; X64:       ## %bb.0:
    418 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
    419 ; X64-NEXT:    retq
    420   %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
    421   ret <4 x i64> %res
    422 }
    423 declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
    424 
    425 
    426 define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
    427 ; X86-LABEL: test_x86_avx2_pmovsxwd:
    428 ; X86:       ## %bb.0:
    429 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm0
    430 ; X86-NEXT:    retl
    431 ;
    432 ; X64-LABEL: test_x86_avx2_pmovsxwd:
    433 ; X64:       ## %bb.0:
    434 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
    435 ; X64-NEXT:    retq
    436   %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
    437   ret <8 x i32> %res
    438 }
    439 declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
    440 
    441 
    442 define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
    443 ; X86-LABEL: test_x86_avx2_pmovsxwq:
    444 ; X86:       ## %bb.0:
    445 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm0
    446 ; X86-NEXT:    retl
    447 ;
    448 ; X64-LABEL: test_x86_avx2_pmovsxwq:
    449 ; X64:       ## %bb.0:
    450 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm0
    451 ; X64-NEXT:    retq
    452   %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
    453   ret <4 x i64> %res
    454 }
    455 declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
    456 
    457 
    458 define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
    459 ; X86-LABEL: test_x86_avx2_pmovzxbd:
    460 ; X86:       ## %bb.0:
    461 ; X86-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    462 ; X86-NEXT:    retl
    463 ;
    464 ; X64-LABEL: test_x86_avx2_pmovzxbd:
    465 ; X64:       ## %bb.0:
    466 ; X64-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    467 ; X64-NEXT:    retq
    468   %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
    469   ret <8 x i32> %res
    470 }
    471 declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
    472 
    473 
    474 define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
    475 ; X86-LABEL: test_x86_avx2_pmovzxbq:
    476 ; X86:       ## %bb.0:
    477 ; X86-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    478 ; X86-NEXT:    retl
    479 ;
    480 ; X64-LABEL: test_x86_avx2_pmovzxbq:
    481 ; X64:       ## %bb.0:
    482 ; X64-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    483 ; X64-NEXT:    retq
    484   %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
    485   ret <4 x i64> %res
    486 }
    487 declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
    488 
    489 
    490 define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
    491 ; X86-LABEL: test_x86_avx2_pmovzxbw:
    492 ; X86:       ## %bb.0:
    493 ; X86-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    494 ; X86-NEXT:    retl
    495 ;
    496 ; X64-LABEL: test_x86_avx2_pmovzxbw:
    497 ; X64:       ## %bb.0:
    498 ; X64-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    499 ; X64-NEXT:    retq
    500   %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
    501   ret <16 x i16> %res
    502 }
    503 declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
    504 
    505 
    506 define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
    507 ; X86-LABEL: test_x86_avx2_pmovzxdq:
    508 ; X86:       ## %bb.0:
    509 ; X86-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    510 ; X86-NEXT:    retl
    511 ;
    512 ; X64-LABEL: test_x86_avx2_pmovzxdq:
    513 ; X64:       ## %bb.0:
    514 ; X64-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    515 ; X64-NEXT:    retq
    516   %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
    517   ret <4 x i64> %res
    518 }
    519 declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
    520 
    521 
    522 define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
    523 ; X86-LABEL: test_x86_avx2_pmovzxwd:
    524 ; X86:       ## %bb.0:
    525 ; X86-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    526 ; X86-NEXT:    retl
    527 ;
    528 ; X64-LABEL: test_x86_avx2_pmovzxwd:
    529 ; X64:       ## %bb.0:
    530 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    531 ; X64-NEXT:    retq
    532   %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
    533   ret <8 x i32> %res
    534 }
    535 declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
    536 
    537 
    538 define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
    539 ; X86-LABEL: test_x86_avx2_pmovzxwq:
    540 ; X86:       ## %bb.0:
    541 ; X86-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    542 ; X86-NEXT:    retl
    543 ;
    544 ; X64-LABEL: test_x86_avx2_pmovzxwq:
    545 ; X64:       ## %bb.0:
    546 ; X64-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    547 ; X64-NEXT:    retq
    548   %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
    549   ret <4 x i64> %res
    550 }
    551 declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
    552 
    553 ; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
    554 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
    555   ; add operation forces the execution domain.
    556 ; X86-LABEL: test_x86_avx_storeu_dq_256:
    557 ; X86:       ## %bb.0:
    558 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    559 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
    560 ; X86-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
    561 ; X86-NEXT:    vmovdqu %ymm0, (%eax)
    562 ; X86-NEXT:    vzeroupper
    563 ; X86-NEXT:    retl
    564 ;
    565 ; X64-LABEL: test_x86_avx_storeu_dq_256:
    566 ; X64:       ## %bb.0:
    567 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
    568 ; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
    569 ; X64-NEXT:    vmovdqu %ymm0, (%rdi)
    570 ; X64-NEXT:    vzeroupper
    571 ; X64-NEXT:    retq
    572   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    573   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
    574   ret void
    575 }
    576 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
    577 
    578 define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) {
    579 ; X86-LABEL: mm256_max_epi8:
    580 ; X86:       ## %bb.0:
    581 ; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
    582 ; X86-NEXT:    retl
    583 ;
    584 ; X64-LABEL: mm256_max_epi8:
    585 ; X64:       ## %bb.0:
    586 ; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
    587 ; X64-NEXT:    retq
    588   %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
    589   ret <32 x i8> %res
    590 }
    591 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
    592 
    593 define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) {
    594 ; X86-LABEL: mm256_max_epi16:
    595 ; X86:       ## %bb.0:
    596 ; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
    597 ; X86-NEXT:    retl
    598 ;
    599 ; X64-LABEL: mm256_max_epi16:
    600 ; X64:       ## %bb.0:
    601 ; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
    602 ; X64-NEXT:    retq
    603   %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
    604   ret <16 x i16> %res
    605 }
    606 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
    607 
    608 define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) {
    609 ; X86-LABEL: mm256_max_epi32:
    610 ; X86:       ## %bb.0:
    611 ; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
    612 ; X86-NEXT:    retl
    613 ;
    614 ; X64-LABEL: mm256_max_epi32:
    615 ; X64:       ## %bb.0:
    616 ; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
    617 ; X64-NEXT:    retq
    618   %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
    619   ret <8 x i32> %res
    620 }
    621 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
    622 
    623 define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) {
    624 ; X86-LABEL: mm256_max_epu8:
    625 ; X86:       ## %bb.0:
    626 ; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
    627 ; X86-NEXT:    retl
    628 ;
    629 ; X64-LABEL: mm256_max_epu8:
    630 ; X64:       ## %bb.0:
    631 ; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
    632 ; X64-NEXT:    retq
    633   %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
    634   ret <32 x i8> %res
    635 }
    636 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
    637 
    638 define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) {
    639 ; X86-LABEL: mm256_max_epu16:
    640 ; X86:       ## %bb.0:
    641 ; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
    642 ; X86-NEXT:    retl
    643 ;
    644 ; X64-LABEL: mm256_max_epu16:
    645 ; X64:       ## %bb.0:
    646 ; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
    647 ; X64-NEXT:    retq
    648   %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
    649   ret <16 x i16> %res
    650 }
    651 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
    652 
    653 define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) {
    654 ; X86-LABEL: mm256_max_epu32:
    655 ; X86:       ## %bb.0:
    656 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
    657 ; X86-NEXT:    retl
    658 ;
    659 ; X64-LABEL: mm256_max_epu32:
    660 ; X64:       ## %bb.0:
    661 ; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
    662 ; X64-NEXT:    retq
    663   %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
    664   ret <8 x i32> %res
    665 }
    666 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
    667 
    668 define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) {
    669 ; X86-LABEL: mm256_min_epi8:
    670 ; X86:       ## %bb.0:
    671 ; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
    672 ; X86-NEXT:    retl
    673 ;
    674 ; X64-LABEL: mm256_min_epi8:
    675 ; X64:       ## %bb.0:
    676 ; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
    677 ; X64-NEXT:    retq
    678   %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
    679   ret <32 x i8> %res
    680 }
    681 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
    682 
    683 define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) {
    684 ; X86-LABEL: mm256_min_epi16:
    685 ; X86:       ## %bb.0:
    686 ; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
    687 ; X86-NEXT:    retl
    688 ;
    689 ; X64-LABEL: mm256_min_epi16:
    690 ; X64:       ## %bb.0:
    691 ; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
    692 ; X64-NEXT:    retq
    693   %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
    694   ret <16 x i16> %res
    695 }
    696 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
    697 
    698 define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) {
    699 ; X86-LABEL: mm256_min_epi32:
    700 ; X86:       ## %bb.0:
    701 ; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    702 ; X86-NEXT:    retl
    703 ;
    704 ; X64-LABEL: mm256_min_epi32:
    705 ; X64:       ## %bb.0:
    706 ; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    707 ; X64-NEXT:    retq
    708   %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
    709   ret <8 x i32> %res
    710 }
    711 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
    712 
    713 define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) {
    714 ; X86-LABEL: mm256_min_epu8:
    715 ; X86:       ## %bb.0:
    716 ; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm0
    717 ; X86-NEXT:    retl
    718 ;
    719 ; X64-LABEL: mm256_min_epu8:
    720 ; X64:       ## %bb.0:
    721 ; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm0
    722 ; X64-NEXT:    retq
    723   %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
    724   ret <32 x i8> %res
    725 }
    726 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
    727 
    728 define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) {
    729 ; X86-LABEL: mm256_min_epu16:
    730 ; X86:       ## %bb.0:
    731 ; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
    732 ; X86-NEXT:    retl
    733 ;
    734 ; X64-LABEL: mm256_min_epu16:
    735 ; X64:       ## %bb.0:
    736 ; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
    737 ; X64-NEXT:    retq
    738   %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
    739   ret <16 x i16> %res
    740 }
    741 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
    742 
    743 define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) {
    744 ; X86-LABEL: mm256_min_epu32:
    745 ; X86:       ## %bb.0:
    746 ; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    747 ; X86-NEXT:    retl
    748 ;
    749 ; X64-LABEL: mm256_min_epu32:
    750 ; X64:       ## %bb.0:
    751 ; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    752 ; X64-NEXT:    retq
    753   %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
    754   ret <8 x i32> %res
    755 }
    756 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
    757 
    758 define <32 x i8> @mm256_avg_epu8(<32 x i8> %a0, <32 x i8> %a1) {
    759 ; X86-LABEL: mm256_avg_epu8:
    760 ; X86:       ## %bb.0:
    761 ; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
    762 ; X86-NEXT:    retl
    763 ;
    764 ; X64-LABEL: mm256_avg_epu8:
    765 ; X64:       ## %bb.0:
    766 ; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm0
    767 ; X64-NEXT:    retq
    768   %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    769   ret <32 x i8> %res
    770 }
    771 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
    772 
    773 define <16 x i16> @mm256_avg_epu16(<16 x i16> %a0, <16 x i16> %a1) {
    774 ; X86-LABEL: mm256_avg_epu16:
    775 ; X86:       ## %bb.0:
    776 ; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
    777 ; X86-NEXT:    retl
    778 ;
    779 ; X64-LABEL: mm256_avg_epu16:
    780 ; X64:       ## %bb.0:
    781 ; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm0
    782 ; X64-NEXT:    retq
    783   %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    784   ret <16 x i16> %res
    785 }
    786 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
    787 
    788 define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
    789 ; X86-LABEL: test_x86_avx2_pabs_b:
    790 ; X86:       ## %bb.0:
    791 ; X86-NEXT:    vpabsb %ymm0, %ymm0
    792 ; X86-NEXT:    retl
    793 ;
    794 ; X64-LABEL: test_x86_avx2_pabs_b:
    795 ; X64:       ## %bb.0:
    796 ; X64-NEXT:    vpabsb %ymm0, %ymm0
    797 ; X64-NEXT:    retq
    798   %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
    799   ret <32 x i8> %res
    800 }
    801 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
    802 
    803 define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
    804 ; X86-LABEL: test_x86_avx2_pabs_d:
    805 ; X86:       ## %bb.0:
    806 ; X86-NEXT:    vpabsd %ymm0, %ymm0
    807 ; X86-NEXT:    retl
    808 ;
    809 ; X64-LABEL: test_x86_avx2_pabs_d:
    810 ; X64:       ## %bb.0:
    811 ; X64-NEXT:    vpabsd %ymm0, %ymm0
    812 ; X64-NEXT:    retq
    813   %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
    814   ret <8 x i32> %res
    815 }
    816 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
    817 
    818 
    819 define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
    820 ; X86-LABEL: test_x86_avx2_pabs_w:
    821 ; X86:       ## %bb.0:
    822 ; X86-NEXT:    vpabsw %ymm0, %ymm0
    823 ; X86-NEXT:    retl
    824 ;
    825 ; X64-LABEL: test_x86_avx2_pabs_w:
    826 ; X64:       ## %bb.0:
    827 ; X64-NEXT:    vpabsw %ymm0, %ymm0
    828 ; X64-NEXT:    retq
    829   %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
    830   ret <16 x i16> %res
    831 }
    832 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
    833 
    834 
    835 define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
    836 ; X86-LABEL: test_x86_avx2_vperm2i128:
    837 ; X86:       ## %bb.0:
    838 ; X86-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
    839 ; X86-NEXT:    retl
    840 ;
    841 ; X64-LABEL: test_x86_avx2_vperm2i128:
    842 ; X64:       ## %bb.0:
    843 ; X64-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
    844 ; X64-NEXT:    retq
    845   %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
    846   ret <4 x i64> %res
    847 }
    848 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
    849 
    850 
    851 define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
    852 ; X86-LABEL: test_x86_avx2_pmulu_dq:
    853 ; X86:       ## %bb.0:
    854 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
    855 ; X86-NEXT:    retl
    856 ;
    857 ; X64-LABEL: test_x86_avx2_pmulu_dq:
    858 ; X64:       ## %bb.0:
    859 ; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
    860 ; X64-NEXT:    retq
    861   %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
    862   ret <4 x i64> %res
    863 }
    864 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
    865 
    866 
    867 define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) {
    868 ; X86-LABEL: test_x86_avx2_pmul_dq:
    869 ; X86:       ## %bb.0:
    870 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
    871 ; X86-NEXT:    retl
    872 ;
    873 ; X64-LABEL: test_x86_avx2_pmul_dq:
    874 ; X64:       ## %bb.0:
    875 ; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0
    876 ; X64-NEXT:    retq
    877   %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
    878   ret <4 x i64> %res
    879 }
    880 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
    881