Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s
      3 
      4 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
      5 ; CHECK-LABEL: test_x86_avx2_pblendw:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
      8 ; CHECK-NEXT:    retl
      9   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
     10   ret <16 x i16> %res
     11 }
     12 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
     13 
     14 
     15 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
     16 ; CHECK-LABEL: test_x86_avx2_pblendd_128:
     17 ; CHECK:       ## BB#0:
     18 ; CHECK-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
     19 ; CHECK-NEXT:    retl
     20   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
     21   ret <4 x i32> %res
     22 }
     23 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
     24 
     25 
     26 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
     27 ; CHECK-LABEL: test_x86_avx2_pblendd_256:
     28 ; CHECK:       ## BB#0:
     29 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
     30 ; CHECK-NEXT:    retl
     31   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
     32   ret <8 x i32> %res
     33 }
     34 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
     35 
     36 
     37 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
     38 ; CHECK-LABEL: test_x86_avx2_mpsadbw:
     39 ; CHECK:       ## BB#0:
     40 ; CHECK-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0
     41 ; CHECK-NEXT:    retl
     42   %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
     43   ret <16 x i16> %res
     44 }
     45 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
     46 
     47 
     48 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
     49 ; CHECK-LABEL: test_x86_avx2_psll_dq_bs:
     50 ; CHECK:       ## BB#0:
     51 ; CHECK-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
     52 ; CHECK-NEXT:    retl
     53   %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
     54   ret <4 x i64> %res
     55 }
     56 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
     57 
     58 
     59 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
     60 ; CHECK-LABEL: test_x86_avx2_psrl_dq_bs:
     61 ; CHECK:       ## BB#0:
     62 ; CHECK-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
     63 ; CHECK-NEXT:    retl
     64   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
     65   ret <4 x i64> %res
     66 }
     67 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
     68 
     69 
     70 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
     71 ; CHECK-LABEL: test_x86_avx2_psll_dq:
     72 ; CHECK:       ## BB#0:
     73 ; CHECK-NEXT:    vpslldq {{.*#+}} ymm0 = zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
     74 ; CHECK-NEXT:    retl
     75   %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
     76   ret <4 x i64> %res
     77 }
     78 declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
     79 
     80 
     81 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
     82 ; CHECK-LABEL: test_x86_avx2_psrl_dq:
     83 ; CHECK:       ## BB#0:
     84 ; CHECK-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],zero
     85 ; CHECK-NEXT:    retl
     86   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 8) ; <<4 x i64>> [#uses=1]
     87   ret <4 x i64> %res
     88 }
     89 declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
     90 
     91 
     92 define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
     93 ; CHECK-LABEL: test_x86_avx2_vextracti128:
     94 ; CHECK:       ## BB#0:
     95 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
     96 ; CHECK-NEXT:    vzeroupper
     97 ; CHECK-NEXT:    retl
     98   %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
     99   ret <2 x i64> %res
    100 }
    101 declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
    102 
    103 
    104 define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
    105 ; CHECK-LABEL: test_x86_avx2_vinserti128:
    106 ; CHECK:       ## BB#0:
    107 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    108 ; CHECK-NEXT:    retl
    109   %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
    110   ret <4 x i64> %res
    111 }
    112 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
    113 
    114 
    115 define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
    116 ; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
    117 ; CHECK:       ## BB#0:
    118 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
    119 ; CHECK-NEXT:    retl
    120   %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
    121   ret <4 x double> %res
    122 }
    123 declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
    124 
    125 
    126 define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
    127 ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps:
    128 ; CHECK:       ## BB#0:
    129 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
    130 ; CHECK-NEXT:    retl
    131   %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
    132   ret <4 x float> %res
    133 }
    134 declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
    135 
    136 
    137 define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
    138 ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
    139 ; CHECK:       ## BB#0:
    140 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
    141 ; CHECK-NEXT:    retl
    142   %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
    143   ret <8 x float> %res
    144 }
    145 declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
    146 
    147 
    148 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
    149 ; CHECK-LABEL: test_x86_avx2_pbroadcastb_128:
    150 ; CHECK:       ## BB#0:
    151 ; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
    152 ; CHECK-NEXT:    retl
    153   %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
    154   ret <16 x i8> %res
    155 }
    156 declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
    157 
    158 
    159 define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
    160 ; CHECK-LABEL: test_x86_avx2_pbroadcastb_256:
    161 ; CHECK:       ## BB#0:
    162 ; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
    163 ; CHECK-NEXT:    retl
    164   %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
    165   ret <32 x i8> %res
    166 }
    167 declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
    168 
    169 
    170 define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
    171 ; CHECK-LABEL: test_x86_avx2_pbroadcastw_128:
    172 ; CHECK:       ## BB#0:
    173 ; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
    174 ; CHECK-NEXT:    retl
    175   %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
    176   ret <8 x i16> %res
    177 }
    178 declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
    179 
    180 
    181 define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
    182 ; CHECK-LABEL: test_x86_avx2_pbroadcastw_256:
    183 ; CHECK:       ## BB#0:
    184 ; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
    185 ; CHECK-NEXT:    retl
    186   %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
    187   ret <16 x i16> %res
    188 }
    189 declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
    190 
    191 
    192 define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
    193 ; CHECK-LABEL: test_x86_avx2_pbroadcastd_128:
    194 ; CHECK:       ## BB#0:
    195 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
    196 ; CHECK-NEXT:    retl
    197   %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
    198   ret <4 x i32> %res
    199 }
    200 declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
    201 
    202 
    203 define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
    204 ; CHECK-LABEL: test_x86_avx2_pbroadcastd_256:
    205 ; CHECK:       ## BB#0:
    206 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
    207 ; CHECK-NEXT:    retl
    208   %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
    209   ret <8 x i32> %res
    210 }
    211 declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
    212 
    213 
    214 define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
    215 ; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
    216 ; CHECK:       ## BB#0:
    217 ; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
    218 ; CHECK-NEXT:    retl
    219   %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
    220   ret <2 x i64> %res
    221 }
    222 declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
    223 
    224 
    225 define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
    226 ; CHECK-LABEL: test_x86_avx2_pbroadcastq_256:
    227 ; CHECK:       ## BB#0:
    228 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
    229 ; CHECK-NEXT:    retl
    230   %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
    231   ret <4 x i64> %res
    232 }
    233 declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
    234 
    235 
    236 define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
    237 ; CHECK-LABEL: test_x86_avx2_pmovsxbd:
    238 ; CHECK:       ## BB#0:
    239 ; CHECK-NEXT:    vpmovsxbd %xmm0, %ymm0
    240 ; CHECK-NEXT:    retl
    241   %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
    242   ret <8 x i32> %res
    243 }
    244 declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
    245 
    246 
    247 define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
    248 ; CHECK-LABEL: test_x86_avx2_pmovsxbq:
    249 ; CHECK:       ## BB#0:
    250 ; CHECK-NEXT:    vpmovsxbq %xmm0, %ymm0
    251 ; CHECK-NEXT:    retl
    252   %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
    253   ret <4 x i64> %res
    254 }
    255 declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
    256 
    257 
    258 define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
    259 ; CHECK-LABEL: test_x86_avx2_pmovsxbw:
    260 ; CHECK:       ## BB#0:
    261 ; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
    262 ; CHECK-NEXT:    retl
    263   %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    264   ret <16 x i16> %res
    265 }
    266 declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
    267 
    268 
    269 define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
    270 ; CHECK-LABEL: test_x86_avx2_pmovsxdq:
    271 ; CHECK:       ## BB#0:
    272 ; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0
    273 ; CHECK-NEXT:    retl
    274   %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
    275   ret <4 x i64> %res
    276 }
    277 declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
    278 
    279 
    280 define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
    281 ; CHECK-LABEL: test_x86_avx2_pmovsxwd:
    282 ; CHECK:       ## BB#0:
    283 ; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
    284 ; CHECK-NEXT:    retl
    285   %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
    286   ret <8 x i32> %res
    287 }
    288 declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
    289 
    290 
    291 define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
    292 ; CHECK-LABEL: test_x86_avx2_pmovsxwq:
    293 ; CHECK:       ## BB#0:
    294 ; CHECK-NEXT:    vpmovsxwq %xmm0, %ymm0
    295 ; CHECK-NEXT:    retl
    296   %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
    297   ret <4 x i64> %res
    298 }
    299 declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
    300 
    301 
    302 define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
    303 ; CHECK-LABEL: test_x86_avx2_pmovzxbd:
    304 ; CHECK:       ## BB#0:
    305 ; CHECK-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
    306 ; CHECK-NEXT:    retl
    307   %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
    308   ret <8 x i32> %res
    309 }
    310 declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
    311 
    312 
    313 define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
    314 ; CHECK-LABEL: test_x86_avx2_pmovzxbq:
    315 ; CHECK:       ## BB#0:
    316 ; CHECK-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
    317 ; CHECK-NEXT:    retl
    318   %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
    319   ret <4 x i64> %res
    320 }
    321 declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
    322 
    323 
    324 define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
    325 ; CHECK-LABEL: test_x86_avx2_pmovzxbw:
    326 ; CHECK:       ## BB#0:
    327 ; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
    328 ; CHECK-NEXT:    retl
    329   %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
    330   ret <16 x i16> %res
    331 }
    332 declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
    333 
    334 
    335 define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
    336 ; CHECK-LABEL: test_x86_avx2_pmovzxdq:
    337 ; CHECK:       ## BB#0:
    338 ; CHECK-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    339 ; CHECK-NEXT:    retl
    340   %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
    341   ret <4 x i64> %res
    342 }
    343 declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
    344 
    345 
    346 define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
    347 ; CHECK-LABEL: test_x86_avx2_pmovzxwd:
    348 ; CHECK:       ## BB#0:
    349 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    350 ; CHECK-NEXT:    retl
    351   %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
    352   ret <8 x i32> %res
    353 }
    354 declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
    355 
    356 
    357 define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
    358 ; CHECK-LABEL: test_x86_avx2_pmovzxwq:
    359 ; CHECK:       ## BB#0:
    360 ; CHECK-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    361 ; CHECK-NEXT:    retl
    362   %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
    363   ret <4 x i64> %res
    364 }
    365 declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
    366 
    367 ; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
    368 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
    369   ; add operation forces the execution domain.
    370 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
    371 ; CHECK:       ## BB#0:
    372 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    373 ; CHECK-NEXT:    vpaddb LCPI33_0, %ymm0, %ymm0
    374 ; CHECK-NEXT:    vmovdqu %ymm0, (%eax)
    375 ; CHECK-NEXT:    vzeroupper
    376 ; CHECK-NEXT:    retl
    377   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    378   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
    379   ret void
    380 }
    381 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
    382 
    383 define <32 x i8> @mm256_max_epi8(<32 x i8> %a0, <32 x i8> %a1) {
    384 ; CHECK-LABEL: mm256_max_epi8:
    385 ; CHECK:       ## BB#0:
    386 ; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
    387 ; CHECK-NEXT:    retl
    388 ;
    389   %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
    390   ret <32 x i8> %res
    391 }
    392 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
    393 
    394 define <16 x i16> @mm256_max_epi16(<16 x i16> %a0, <16 x i16> %a1) {
    395 ; CHECK-LABEL: mm256_max_epi16:
    396 ; CHECK:       ## BB#0:
    397 ; CHECK-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
    398 ; CHECK-NEXT:    retl
    399 ;
    400   %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
    401   ret <16 x i16> %res
    402 }
    403 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
    404 
    405 define <8 x i32> @mm256_max_epi32(<8 x i32> %a0, <8 x i32> %a1) {
    406 ; CHECK-LABEL: mm256_max_epi32:
    407 ; CHECK:       ## BB#0:
    408 ; CHECK-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
    409 ; CHECK-NEXT:    retl
    410 ;
    411   %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
    412   ret <8 x i32> %res
    413 }
    414 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
    415 
    416 define <32 x i8> @mm256_max_epu8(<32 x i8> %a0, <32 x i8> %a1) {
    417 ; CHECK-LABEL: mm256_max_epu8:
    418 ; CHECK:       ## BB#0:
    419 ; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
    420 ; CHECK-NEXT:    retl
    421 ;
    422   %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
    423   ret <32 x i8> %res
    424 }
    425 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
    426 
    427 define <16 x i16> @mm256_max_epu16(<16 x i16> %a0, <16 x i16> %a1) {
    428 ; CHECK-LABEL: mm256_max_epu16:
    429 ; CHECK:       ## BB#0:
    430 ; CHECK-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
    431 ; CHECK-NEXT:    retl
    432 ;
    433   %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
    434   ret <16 x i16> %res
    435 }
    436 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
    437 
    438 define <8 x i32> @mm256_max_epu32(<8 x i32> %a0, <8 x i32> %a1) {
    439 ; CHECK-LABEL: mm256_max_epu32:
    440 ; CHECK:       ## BB#0:
    441 ; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
    442 ; CHECK-NEXT:    retl
    443 ;
    444   %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
    445   ret <8 x i32> %res
    446 }
    447 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
    448 
    449 define <32 x i8> @mm256_min_epi8(<32 x i8> %a0, <32 x i8> %a1) {
    450 ; CHECK-LABEL: mm256_min_epi8:
    451 ; CHECK:       ## BB#0:
    452 ; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
    453 ; CHECK-NEXT:    retl
    454 ;
    455   %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
    456   ret <32 x i8> %res
    457 }
    458 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
    459 
    460 define <16 x i16> @mm256_min_epi16(<16 x i16> %a0, <16 x i16> %a1) {
    461 ; CHECK-LABEL: mm256_min_epi16:
    462 ; CHECK:       ## BB#0:
    463 ; CHECK-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
    464 ; CHECK-NEXT:    retl
    465 ;
    466   %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
    467   ret <16 x i16> %res
    468 }
    469 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
    470 
    471 define <8 x i32> @mm256_min_epi32(<8 x i32> %a0, <8 x i32> %a1) {
    472 ; CHECK-LABEL: mm256_min_epi32:
    473 ; CHECK:       ## BB#0:
    474 ; CHECK-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
    475 ; CHECK-NEXT:    retl
    476 ;
    477   %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
    478   ret <8 x i32> %res
    479 }
    480 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
    481 
    482 define <32 x i8> @mm256_min_epu8(<32 x i8> %a0, <32 x i8> %a1) {
    483 ; CHECK-LABEL: mm256_min_epu8:
    484 ; CHECK:       ## BB#0:
    485 ; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm0
    486 ; CHECK-NEXT:    retl
    487 ;
    488   %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
    489   ret <32 x i8> %res
    490 }
    491 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
    492 
    493 define <16 x i16> @mm256_min_epu16(<16 x i16> %a0, <16 x i16> %a1) {
    494 ; CHECK-LABEL: mm256_min_epu16:
    495 ; CHECK:       ## BB#0:
    496 ; CHECK-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
    497 ; CHECK-NEXT:    retl
    498 ;
    499   %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
    500   ret <16 x i16> %res
    501 }
    502 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
    503 
    504 define <8 x i32> @mm256_min_epu32(<8 x i32> %a0, <8 x i32> %a1) {
    505 ; CHECK-LABEL: mm256_min_epu32:
    506 ; CHECK:       ## BB#0:
    507 ; CHECK-NEXT:    vpminud %ymm1, %ymm0, %ymm0
    508 ; CHECK-NEXT:    retl
    509 ;
    510   %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
    511   ret <8 x i32> %res
    512 }
    513 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
    514 
    515