Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX
      3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+pclmul,+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL
      6 
      7 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
      8 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
      9 ; CHECK:       # %bb.0:
     10 ; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd0,0xc1]
     11 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     12   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
     13   ret <4 x double> %res
     14 }
     15 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
     16 
     17 
     18 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
     19 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
     20 ; CHECK:       # %bb.0:
     21 ; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0xd0,0xc1]
     22 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     23   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
     24   ret <8 x float> %res
     25 }
     26 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
     27 
     28 
     29 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
     30 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
     31 ; CHECK:       # %bb.0:
     32 ; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4b,0xc1,0x20]
     33 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     34   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
     35   ret <4 x double> %res
     36 }
     37 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
     38 
     39 
     40 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
     41 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
     42 ; CHECK:       # %bb.0:
     43 ; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4a,0xc1,0x20]
     44 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     45   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
     46   ret <8 x float> %res
     47 }
     48 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
     49 
     50 
     51 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
     52 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
     53 ; CHECK:       # %bb.0:
     54 ; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xc2,0xc1,0x07]
     55 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     56   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
     57   ret <4 x double> %res
     58 }
     59 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
     60 
     61 
     62 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
     63 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
     64 ; CHECK:       # %bb.0:
     65 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x07]
     66 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     67   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
     68   ret <8 x float> %res
     69 }
     70 
     71 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
     72 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
     73 ; CHECK:       # %bb.0:
     74 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x00]
     75 ; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x01]
     76 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x02]
     77 ; CHECK-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x03]
     78 ; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x04]
     79 ; CHECK-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x05]
     80 ; CHECK-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x06]
     81 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x07]
     82 ; CHECK-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x08]
     83 ; CHECK-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x09]
     84 ; CHECK-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0a]
     85 ; CHECK-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0b]
     86 ; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0c]
     87 ; CHECK-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0d]
     88 ; CHECK-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0e]
     89 ; CHECK-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x0f]
     90 ; CHECK-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x10]
     91 ; CHECK-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x11]
     92 ; CHECK-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x12]
     93 ; CHECK-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x13]
     94 ; CHECK-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x14]
     95 ; CHECK-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x15]
     96 ; CHECK-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x16]
     97 ; CHECK-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x17]
     98 ; CHECK-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x18]
     99 ; CHECK-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x19]
    100 ; CHECK-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1a]
    101 ; CHECK-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1b]
    102 ; CHECK-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1c]
    103 ; CHECK-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1d]
    104 ; CHECK-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1 # encoding: [0xc5,0xfc,0xc2,0xc9,0x1e]
    105 ; CHECK-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0xc2,0xc1,0x1f]
    106 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    107   %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
    108   %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
    109   %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
    110   %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
    111   %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
    112   %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
    113   %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
    114   %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
    115   %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
    116   %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
    117   %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
    118   %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
    119   %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
    120   %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
    121   %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
    122   %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
    123   %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
    124   %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
    125   %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
    126   %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
    127   %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
    128   %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
    129   %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
    130   %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
    131   %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
    132   %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
    133   %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
    134   %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
    135   %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
    136   %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
    137   %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
    138   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
    139   ret <8 x float> %res
    140 }
    141 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
    142 
    143 
    144 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
    145 ; AVX-LABEL: test_x86_avx_cvt_pd2_ps_256:
    146 ; AVX:       # %bb.0:
    147 ; AVX-NEXT:    vcvtpd2ps %ymm0, %xmm0 # encoding: [0xc5,0xfd,0x5a,0xc0]
    148 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    149 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    150 ;
    151 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256:
    152 ; AVX512VL:       # %bb.0:
    153 ; AVX512VL-NEXT:    vcvtpd2ps %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xc0]
    154 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    155 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    156   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
    157   ret <4 x float> %res
    158 }
    159 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
    160 
    161 
    162 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
    163 ; AVX-LABEL: test_x86_avx_cvt_pd2dq_256:
    164 ; AVX:       # %bb.0:
    165 ; AVX-NEXT:    vcvtpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xff,0xe6,0xc0]
    166 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    167 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    168 ;
    169 ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256:
    170 ; AVX512VL:       # %bb.0:
    171 ; AVX512VL-NEXT:    vcvtpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xc0]
    172 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    173 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    174   %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
    175   ret <4 x i32> %res
    176 }
    177 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
    178 
    179 
    180 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
    181 ; AVX-LABEL: test_x86_avx_cvt_ps2dq_256:
    182 ; AVX:       # %bb.0:
    183 ; AVX-NEXT:    vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0]
    184 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    185 ;
    186 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256:
    187 ; AVX512VL:       # %bb.0:
    188 ; AVX512VL-NEXT:    vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0]
    189 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    190   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
    191   ret <8 x i32> %res
    192 }
    193 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
    194 
    195 
    196 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
    197 ; AVX-LABEL: test_x86_avx_cvtt_pd2dq_256:
    198 ; AVX:       # %bb.0:
    199 ; AVX-NEXT:    vcvttpd2dq %ymm0, %xmm0 # encoding: [0xc5,0xfd,0xe6,0xc0]
    200 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    201 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    202 ;
    203 ; AVX512VL-LABEL: test_x86_avx_cvtt_pd2dq_256:
    204 ; AVX512VL:       # %bb.0:
    205 ; AVX512VL-NEXT:    vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0]
    206 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    207 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    208   %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
    209   ret <4 x i32> %res
    210 }
    211 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
    212 
    213 
    214 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
    215 ; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256:
    216 ; AVX:       # %bb.0:
    217 ; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfe,0x5b,0xc0]
    218 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    219 ;
    220 ; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256:
    221 ; AVX512VL:       # %bb.0:
    222 ; AVX512VL-NEXT:    vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0]
    223 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    224   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
    225   ret <8 x i32> %res
    226 }
    227 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
    228 
    229 
    230 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
    231 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
    232 ; CHECK:       # %bb.0:
    233 ; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
    234 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    235   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
    236   ret <8 x float> %res
    237 }
    238 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
    239 
    240 
    241 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
    242 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
    243 ; CHECK:       # %bb.0:
    244 ; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7c,0xc1]
    245 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    246   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
    247   ret <4 x double> %res
    248 }
    249 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
    250 
    251 
    252 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
    253 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
    254 ; CHECK:       # %bb.0:
    255 ; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7c,0xc1]
    256 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    257   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
    258   ret <8 x float> %res
    259 }
    260 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
    261 
    262 
    263 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
    264 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
    265 ; CHECK:       # %bb.0:
    266 ; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x7d,0xc1]
    267 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    268   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
    269   ret <4 x double> %res
    270 }
    271 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
    272 
    273 
    274 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
    275 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
    276 ; CHECK:       # %bb.0:
    277 ; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xff,0x7d,0xc1]
    278 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    279   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
    280   ret <8 x float> %res
    281 }
    282 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
    283 
    284 
    285 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
    286 ; X86-LABEL: test_x86_avx_ldu_dq_256:
    287 ; X86:       # %bb.0:
    288 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    289 ; X86-NEXT:    vlddqu (%eax), %ymm0 # encoding: [0xc5,0xff,0xf0,0x00]
    290 ; X86-NEXT:    retl # encoding: [0xc3]
    291 ;
    292 ; X64-LABEL: test_x86_avx_ldu_dq_256:
    293 ; X64:       # %bb.0:
    294 ; X64-NEXT:    vlddqu (%rdi), %ymm0 # encoding: [0xc5,0xff,0xf0,0x07]
    295 ; X64-NEXT:    retq # encoding: [0xc3]
    296   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
    297   ret <32 x i8> %res
    298 }
    299 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
    300 
    301 
    302 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
    303 ; X86-LABEL: test_x86_avx_maskload_pd:
    304 ; X86:       # %bb.0:
    305 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    306 ; X86-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x00]
    307 ; X86-NEXT:    retl # encoding: [0xc3]
    308 ;
    309 ; X64-LABEL: test_x86_avx_maskload_pd:
    310 ; X64:       # %bb.0:
    311 ; X64-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2d,0x07]
    312 ; X64-NEXT:    retq # encoding: [0xc3]
    313   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
    314   ret <2 x double> %res
    315 }
    316 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
    317 
    318 
    319 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
    320 ; X86-LABEL: test_x86_avx_maskload_pd_256:
    321 ; X86:       # %bb.0:
    322 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    323 ; X86-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x00]
    324 ; X86-NEXT:    retl # encoding: [0xc3]
    325 ;
    326 ; X64-LABEL: test_x86_avx_maskload_pd_256:
    327 ; X64:       # %bb.0:
    328 ; X64-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2d,0x07]
    329 ; X64-NEXT:    retq # encoding: [0xc3]
    330   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
    331   ret <4 x double> %res
    332 }
    333 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
    334 
    335 
    336 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
    337 ; X86-LABEL: test_x86_avx_maskload_ps:
    338 ; X86:       # %bb.0:
    339 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    340 ; X86-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x00]
    341 ; X86-NEXT:    retl # encoding: [0xc3]
    342 ;
    343 ; X64-LABEL: test_x86_avx_maskload_ps:
    344 ; X64:       # %bb.0:
    345 ; X64-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x2c,0x07]
    346 ; X64-NEXT:    retq # encoding: [0xc3]
    347   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
    348   ret <4 x float> %res
    349 }
    350 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
    351 
    352 
    353 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
    354 ; X86-LABEL: test_x86_avx_maskload_ps_256:
    355 ; X86:       # %bb.0:
    356 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    357 ; X86-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x00]
    358 ; X86-NEXT:    retl # encoding: [0xc3]
    359 ;
    360 ; X64-LABEL: test_x86_avx_maskload_ps_256:
    361 ; X64:       # %bb.0:
    362 ; X64-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2c,0x07]
    363 ; X64-NEXT:    retq # encoding: [0xc3]
    364   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
    365   ret <8 x float> %res
    366 }
    367 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
    368 
    369 
    370 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
    371 ; X86-LABEL: test_x86_avx_maskstore_pd:
    372 ; X86:       # %bb.0:
    373 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    374 ; X86-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2f,0x08]
    375 ; X86-NEXT:    retl # encoding: [0xc3]
    376 ;
    377 ; X64-LABEL: test_x86_avx_maskstore_pd:
    378 ; X64:       # %bb.0:
    379 ; X64-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2f,0x0f]
    380 ; X64-NEXT:    retq # encoding: [0xc3]
    381   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
    382   ret void
    383 }
    384 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
    385 
    386 
    387 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
    388 ; X86-LABEL: test_x86_avx_maskstore_pd_256:
    389 ; X86:       # %bb.0:
    390 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    391 ; X86-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2f,0x08]
    392 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    393 ; X86-NEXT:    retl # encoding: [0xc3]
    394 ;
    395 ; X64-LABEL: test_x86_avx_maskstore_pd_256:
    396 ; X64:       # %bb.0:
    397 ; X64-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2f,0x0f]
    398 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    399 ; X64-NEXT:    retq # encoding: [0xc3]
    400   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
    401   ret void
    402 }
    403 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
    404 
    405 
    406 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
    407 ; X86-LABEL: test_x86_avx_maskstore_ps:
    408 ; X86:       # %bb.0:
    409 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    410 ; X86-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x2e,0x08]
    411 ; X86-NEXT:    retl # encoding: [0xc3]
    412 ;
    413 ; X64-LABEL: test_x86_avx_maskstore_ps:
    414 ; X64:       # %bb.0:
    415 ; X64-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x2e,0x0f]
    416 ; X64-NEXT:    retq # encoding: [0xc3]
    417   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
    418   ret void
    419 }
    420 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
    421 
    422 
    423 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
    424 ; X86-LABEL: test_x86_avx_maskstore_ps_256:
    425 ; X86:       # %bb.0:
    426 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    427 ; X86-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x2e,0x08]
    428 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    429 ; X86-NEXT:    retl # encoding: [0xc3]
    430 ;
    431 ; X64-LABEL: test_x86_avx_maskstore_ps_256:
    432 ; X64:       # %bb.0:
    433 ; X64-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x2e,0x0f]
    434 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    435 ; X64-NEXT:    retq # encoding: [0xc3]
    436   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
    437   ret void
    438 }
    439 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
    440 
    441 
    442 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
    443 ; AVX-LABEL: test_x86_avx_max_pd_256:
    444 ; AVX:       # %bb.0:
    445 ; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5f,0xc1]
    446 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    447 ;
    448 ; AVX512VL-LABEL: test_x86_avx_max_pd_256:
    449 ; AVX512VL:       # %bb.0:
    450 ; AVX512VL-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5f,0xc1]
    451 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    452   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
    453   ret <4 x double> %res
    454 }
    455 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
    456 
    457 
    458 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
    459 ; AVX-LABEL: test_x86_avx_max_ps_256:
    460 ; AVX:       # %bb.0:
    461 ; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5f,0xc1]
    462 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    463 ;
    464 ; AVX512VL-LABEL: test_x86_avx_max_ps_256:
    465 ; AVX512VL:       # %bb.0:
    466 ; AVX512VL-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
    467 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    468   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
    469   ret <8 x float> %res
    470 }
    471 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
    472 
    473 
    474 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
    475 ; AVX-LABEL: test_x86_avx_min_pd_256:
    476 ; AVX:       # %bb.0:
    477 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5d,0xc1]
    478 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    479 ;
    480 ; AVX512VL-LABEL: test_x86_avx_min_pd_256:
    481 ; AVX512VL:       # %bb.0:
    482 ; AVX512VL-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5d,0xc1]
    483 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    484   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
    485   ret <4 x double> %res
    486 }
    487 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
    488 
    489 
    490 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
    491 ; AVX-LABEL: test_x86_avx_min_ps_256:
    492 ; AVX:       # %bb.0:
    493 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5d,0xc1]
    494 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    495 ;
    496 ; AVX512VL-LABEL: test_x86_avx_min_ps_256:
    497 ; AVX512VL:       # %bb.0:
    498 ; AVX512VL-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
    499 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    500   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
    501   ret <8 x float> %res
    502 }
    503 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
    504 
    505 
    506 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
    507 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
    508 ; CHECK:       # %bb.0:
    509 ; CHECK-NEXT:    vmovmskpd %ymm0, %eax # encoding: [0xc5,0xfd,0x50,0xc0]
    510 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    511 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    512   %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
    513   ret i32 %res
    514 }
    515 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
    516 
    517 
    518 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
    519 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
    520 ; CHECK:       # %bb.0:
    521 ; CHECK-NEXT:    vmovmskps %ymm0, %eax # encoding: [0xc5,0xfc,0x50,0xc0]
    522 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    523 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    524   %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
    525   ret i32 %res
    526 }
    527 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
    528 
    529 
    530 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
    531 ; CHECK-LABEL: test_x86_avx_ptestc_256:
    532 ; CHECK:       # %bb.0:
    533 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    534 ; CHECK-NEXT:    vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
    535 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
    536 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    537 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    538   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
    539   ret i32 %res
    540 }
    541 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
    542 
    543 
    544 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
    545 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
    546 ; CHECK:       # %bb.0:
    547 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    548 ; CHECK-NEXT:    vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
    549 ; CHECK-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
    550 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    551 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    552   %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
    553   ret i32 %res
    554 }
    555 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
    556 
    557 
    558 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
    559 ; CHECK-LABEL: test_x86_avx_ptestz_256:
    560 ; CHECK:       # %bb.0:
    561 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    562 ; CHECK-NEXT:    vptest %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
    563 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
    564 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    565 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    566   %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
    567   ret i32 %res
    568 }
    569 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
    570 
    571 
    572 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
    573 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
    574 ; CHECK:       # %bb.0:
    575 ; CHECK-NEXT:    vrcpps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x53,0xc0]
    576 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    577   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
    578   ret <8 x float> %res
    579 }
    580 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
    581 
    582 
    583 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
    584 ; AVX-LABEL: test_x86_avx_round_pd_256:
    585 ; AVX:       # %bb.0:
    586 ; AVX-NEXT:    vroundpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07]
    587 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    588 ;
    589 ; AVX512VL-LABEL: test_x86_avx_round_pd_256:
    590 ; AVX512VL:       # %bb.0:
    591 ; AVX512VL-NEXT:    vroundpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x09,0xc0,0x07]
    592 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    593   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
    594   ret <4 x double> %res
    595 }
    596 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
    597 
    598 
    599 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
    600 ; AVX-LABEL: test_x86_avx_round_ps_256:
    601 ; AVX:       # %bb.0:
    602 ; AVX-NEXT:    vroundps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07]
    603 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    604 ;
    605 ; AVX512VL-LABEL: test_x86_avx_round_ps_256:
    606 ; AVX512VL:       # %bb.0:
    607 ; AVX512VL-NEXT:    vroundps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x08,0xc0,0x07]
    608 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    609   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
    610   ret <8 x float> %res
    611 }
    612 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
    613 
    614 
    615 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
    616 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
    617 ; CHECK:       # %bb.0:
    618 ; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x52,0xc0]
    619 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    620   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
    621   ret <8 x float> %res
    622 }
    623 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
    624 
    625 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
    626 ; AVX-LABEL: test_x86_avx_vpermilvar_pd:
    627 ; AVX:       # %bb.0:
    628 ; AVX-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
    629 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    630 ;
    631 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd:
    632 ; AVX512VL:       # %bb.0:
    633 ; AVX512VL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xc1]
    634 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    635   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
    636   ret <2 x double> %res
    637 }
    638 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
    639 
    640 
    641 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
    642 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256:
    643 ; AVX:       # %bb.0:
    644 ; AVX-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
    645 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    646 ;
    647 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256:
    648 ; AVX512VL:       # %bb.0:
    649 ; AVX512VL-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xc1]
    650 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    651   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
    652   ret <4 x double> %res
    653 }
    654 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
    655 
    656 define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
    657 ; AVX-LABEL: test_x86_avx_vpermilvar_pd_256_2:
    658 ; AVX:       # %bb.0:
    659 ; AVX-NEXT:    vpermilpd $9, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
    660 ; AVX-NEXT:    # ymm0 = ymm0[1,0,2,3]
    661 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    662 ;
    663 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
    664 ; AVX512VL:       # %bb.0:
    665 ; AVX512VL-NEXT:    vpermilpd $9, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x09]
    666 ; AVX512VL-NEXT:    # ymm0 = ymm0[1,0,2,3]
    667 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    668   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
    669   ret <4 x double> %res
    670 }
    671 
    672 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
    673 ; AVX-LABEL: test_x86_avx_vpermilvar_ps:
    674 ; AVX:       # %bb.0:
    675 ; AVX-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
    676 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    677 ;
    678 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps:
    679 ; AVX512VL:       # %bb.0:
    680 ; AVX512VL-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xc1]
    681 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    682   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
    683   ret <4 x float> %res
    684 }
    685 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
    686 ; X86-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
    687 ; X86-AVX:       # %bb.0:
    688 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    689 ; X86-AVX-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x00]
    690 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    691 ;
    692 ; X86-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
    693 ; X86-AVX512VL:       # %bb.0:
    694 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    695 ; X86-AVX512VL-NEXT:    vpermilps (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x00]
    696 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    697 ;
    698 ; X64-AVX-LABEL: test_x86_avx_vpermilvar_ps_load:
    699 ; X64-AVX:       # %bb.0:
    700 ; X64-AVX-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0c,0x07]
    701 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    702 ;
    703 ; X64-AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_load:
    704 ; X64-AVX512VL:       # %bb.0:
    705 ; X64-AVX512VL-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0x07]
    706 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    707   %a2 = load <4 x i32>, <4 x i32>* %a1
    708   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
    709   ret <4 x float> %res
    710 }
    711 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
    712 
    713 
    714 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
    715 ; AVX-LABEL: test_x86_avx_vpermilvar_ps_256:
    716 ; AVX:       # %bb.0:
    717 ; AVX-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
    718 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    719 ;
    720 ; AVX512VL-LABEL: test_x86_avx_vpermilvar_ps_256:
    721 ; AVX512VL:       # %bb.0:
    722 ; AVX512VL-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xc1]
    723 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    724   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
    725   ret <8 x float> %res
    726 }
    727 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
    728 
    729 
    730 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
    731 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
    732 ; CHECK:       # %bb.0:
    733 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    734 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
    735 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
    736 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    737   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    738   ret i32 %res
    739 }
    740 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
    741 
    742 
    743 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
    744 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
    745 ; CHECK:       # %bb.0:
    746 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    747 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
    748 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
    749 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    750 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    751   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
    752   ret i32 %res
    753 }
    754 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
    755 
    756 
    757 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
    758 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
    759 ; CHECK:       # %bb.0:
    760 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    761 ; CHECK-NEXT:    vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
    762 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
    763 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    764   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    765   ret i32 %res
    766 }
    767 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
    768 
    769 
    770 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
    771 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
    772 ; CHECK:       # %bb.0:
    773 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    774 ; CHECK-NEXT:    vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
    775 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
    776 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    777 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    778   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
    779   ret i32 %res
    780 }
    781 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
    782 
    783 
    784 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
    785 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
    786 ; CHECK:       # %bb.0:
    787 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    788 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
    789 ; CHECK-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
    790 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    791   %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    792   ret i32 %res
    793 }
    794 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
    795 
    796 
    797 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
    798 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
    799 ; CHECK:       # %bb.0:
    800 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    801 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
    802 ; CHECK-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
    803 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    804 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    805   %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
    806   ret i32 %res
    807 }
    808 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
    809 
    810 
    811 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
    812 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
    813 ; CHECK:       # %bb.0:
    814 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    815 ; CHECK-NEXT:    vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
    816 ; CHECK-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
    817 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    818   %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    819   ret i32 %res
    820 }
    821 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
    822 
    823 
    824 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
    825 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
    826 ; CHECK:       # %bb.0:
    827 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    828 ; CHECK-NEXT:    vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
    829 ; CHECK-NEXT:    seta %al # encoding: [0x0f,0x97,0xc0]
    830 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    831 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    832   %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
    833   ret i32 %res
    834 }
    835 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
    836 
    837 
    838 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
    839 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
    840 ; CHECK:       # %bb.0:
    841 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    842 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
    843 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
    844 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    845   %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    846   ret i32 %res
    847 }
    848 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
    849 
    850 
    851 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
    852 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
    853 ; CHECK:       # %bb.0:
    854 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    855 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
    856 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
    857 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    858 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    859   %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
    860   ret i32 %res
    861 }
    862 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
    863 
    864 
    865 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
    866 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
    867 ; CHECK:       # %bb.0:
    868 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    869 ; CHECK-NEXT:    vtestps %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
    870 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
    871 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    872   %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
    873   ret i32 %res
    874 }
    875 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
    876 
    877 
    878 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
    879 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
    880 ; CHECK:       # %bb.0:
    881 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
    882 ; CHECK-NEXT:    vtestps %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
    883 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
    884 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    885 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    886   %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
    887   ret i32 %res
    888 }
    889 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
    890 
    891 
    892 define void @test_x86_avx_vzeroall() {
    893 ; CHECK-LABEL: test_x86_avx_vzeroall:
    894 ; CHECK:       # %bb.0:
    895 ; CHECK-NEXT:    vzeroall # encoding: [0xc5,0xfc,0x77]
    896 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    897   call void @llvm.x86.avx.vzeroall()
    898   ret void
    899 }
    900 declare void @llvm.x86.avx.vzeroall() nounwind
    901 
    902 
    903 define void @test_x86_avx_vzeroupper() {
    904 ; CHECK-LABEL: test_x86_avx_vzeroupper:
    905 ; CHECK:       # %bb.0:
    906 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    907 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    908   call void @llvm.x86.avx.vzeroupper()
    909   ret void
    910 }
    911 declare void @llvm.x86.avx.vzeroupper() nounwind
    912 
    913 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
    914 ; X86-AVX-LABEL: movnt_dq:
    915 ; X86-AVX:       # %bb.0:
    916 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    917 ; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    918 ; X86-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
    919 ; X86-AVX-NEXT:    vmovntdq %ymm0, (%eax) # encoding: [0xc5,0xfd,0xe7,0x00]
    920 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    921 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    922 ;
    923 ; X86-AVX512VL-LABEL: movnt_dq:
    924 ; X86-AVX512VL:       # %bb.0:
    925 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    926 ; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    927 ; X86-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
    928 ; X86-AVX512VL-NEXT:    vmovntdq %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x00]
    929 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    930 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    931 ;
    932 ; X64-AVX-LABEL: movnt_dq:
    933 ; X64-AVX:       # %bb.0:
    934 ; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    935 ; X64-AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1]
    936 ; X64-AVX-NEXT:    vmovntdq %ymm0, (%rdi) # encoding: [0xc5,0xfd,0xe7,0x07]
    937 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    938 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    939 ;
    940 ; X64-AVX512VL-LABEL: movnt_dq:
    941 ; X64-AVX512VL:       # %bb.0:
    942 ; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    943 ; X64-AVX512VL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1]
    944 ; X64-AVX512VL-NEXT:    vmovntdq %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe7,0x07]
    945 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    946 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    947   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
    948   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    949   tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
    950   ret void
    951 }
    952 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
    953 
    954 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
    955 ; X86-AVX-LABEL: movnt_ps:
    956 ; X86-AVX:       # %bb.0:
    957 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    958 ; X86-AVX-NEXT:    vmovntps %ymm0, (%eax) # encoding: [0xc5,0xfc,0x2b,0x00]
    959 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    960 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    961 ;
    962 ; X86-AVX512VL-LABEL: movnt_ps:
    963 ; X86-AVX512VL:       # %bb.0:
    964 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    965 ; X86-AVX512VL-NEXT:    vmovntps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x00]
    966 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    967 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    968 ;
    969 ; X64-AVX-LABEL: movnt_ps:
    970 ; X64-AVX:       # %bb.0:
    971 ; X64-AVX-NEXT:    vmovntps %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x2b,0x07]
    972 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    973 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    974 ;
    975 ; X64-AVX512VL-LABEL: movnt_ps:
    976 ; X64-AVX512VL:       # %bb.0:
    977 ; X64-AVX512VL-NEXT:    vmovntps %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x2b,0x07]
    978 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    979 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    980   tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
    981   ret void
    982 }
    983 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
    984 
    985 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
    986   ; add operation forces the execution domain.
    987 ; X86-AVX-LABEL: movnt_pd:
    988 ; X86-AVX:       # %bb.0:
    989 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    990 ; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
    991 ; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
    992 ; X86-AVX-NEXT:    vmovntpd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x2b,0x00]
    993 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    994 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    995 ;
    996 ; X86-AVX512VL-LABEL: movnt_pd:
    997 ; X86-AVX512VL:       # %bb.0:
    998 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    999 ; X86-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
   1000 ; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
   1001 ; X86-AVX512VL-NEXT:    vmovntpd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x00]
   1002 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1003 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
   1004 ;
   1005 ; X64-AVX-LABEL: movnt_pd:
   1006 ; X64-AVX:       # %bb.0:
   1007 ; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
   1008 ; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
   1009 ; X64-AVX-NEXT:    vmovntpd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x2b,0x07]
   1010 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1011 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
   1012 ;
   1013 ; X64-AVX512VL-LABEL: movnt_pd:
   1014 ; X64-AVX512VL:       # %bb.0:
   1015 ; X64-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
   1016 ; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
   1017 ; X64-AVX512VL-NEXT:    vmovntpd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x2b,0x07]
   1018 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1019 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
   1020   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   1021   tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
   1022   ret void
   1023 }
   1024 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
   1025 
   1026 
   1027 ; Check for pclmulqdq
   1028 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
   1029 ; CHECK-LABEL: test_x86_pclmulqdq:
   1030 ; CHECK:       # %bb.0:
   1031 ; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x44,0xc1,0x00]
   1032 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1033   %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
   1034   ret <2 x i64> %res
   1035 }
   1036 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   1037