Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
      6 
      7 define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
      8 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
     11 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
     12 ; X86-NEXT:    vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
     13 ; X86-NEXT:    vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
     14 ; X86-NEXT:    vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
     15 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
     16 ; X86-NEXT:    retl # encoding: [0xc3]
     17 ;
     18 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
     19 ; X64:       # %bb.0:
     20 ; X64-NEXT:    vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8]
     21 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     22 ; X64-NEXT:    vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
     23 ; X64-NEXT:    vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
     24 ; X64-NEXT:    vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
     25 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
     26 ; X64-NEXT:    retq # encoding: [0xc3]
     27   %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
     28   %2 = bitcast i16 %x3 to <16 x i1>
     29   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2
     30   %4 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
     31   %5 = bitcast i16 %x3 to <16 x i1>
     32   %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
     33   %7 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1)
     34   %res3 = add <16 x i8> %3, %6
     35   %res4 = add <16 x i8> %res3, %7
     36   ret <16 x i8> %res4
     37 }
     38 
     39 declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
     40 
     41 define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
     42 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
     43 ; X86:       # %bb.0:
     44 ; X86-NEXT:    vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
     45 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
     46 ; X86-NEXT:    vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
     47 ; X86-NEXT:    vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
     48 ; X86-NEXT:    vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
     49 ; X86-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
     50 ; X86-NEXT:    retl # encoding: [0xc3]
     51 ;
     52 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
     53 ; X64:       # %bb.0:
     54 ; X64-NEXT:    vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8]
     55 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     56 ; X64-NEXT:    vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
     57 ; X64-NEXT:    vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
     58 ; X64-NEXT:    vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
     59 ; X64-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
     60 ; X64-NEXT:    retq # encoding: [0xc3]
     61   %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
     62   %2 = bitcast i32 %x3 to <32 x i1>
     63   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2
     64   %4 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
     65   %5 = bitcast i32 %x3 to <32 x i1>
     66   %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
     67   %7 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1)
     68   %res3 = add <32 x i8> %3, %6
     69   %res4 = add <32 x i8> %res3, %7
     70   ret <32 x i8> %res4
     71 }
     72 
     73 declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
     74 
     75 define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
     76 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
     77 ; X86:       # %bb.0:
     78 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
     79 ; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
     80 ; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9]
     81 ; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1]
     82 ; X86-NEXT:    vpaddb %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0]
     83 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
     84 ; X86-NEXT:    retl # encoding: [0xc3]
     85 ;
     86 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
     87 ; X64:       # %bb.0:
     88 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     89 ; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xd9]
     90 ; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
     91 ; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1]
     92 ; X64-NEXT:    vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
     93 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
     94 ; X64-NEXT:    retq # encoding: [0xc3]
     95   %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
     96   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
     97   %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
     98   %res3 = add <16 x i8> %res, %res1
     99   %res4 = add <16 x i8> %res3, %res2
    100   ret <16 x i8> %res4
    101 }
    102 
    103 declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
    104 
    105 define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
    106 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
    107 ; X86:       # %bb.0:
    108 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    109 ; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
    110 ; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9]
    111 ; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1]
    112 ; X86-NEXT:    vpaddb %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
    113 ; X86-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    114 ; X86-NEXT:    retl # encoding: [0xc3]
    115 ;
    116 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
    117 ; X64:       # %bb.0:
    118 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    119 ; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
    120 ; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9]
    121 ; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1]
    122 ; X64-NEXT:    vpaddb %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0]
    123 ; X64-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    124 ; X64-NEXT:    retq # encoding: [0xc3]
    125   %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
    126   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
    127   %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
    128   %res3 = add <32 x i8> %res, %res1
    129   %res4 = add <32 x i8> %res3, %res2
    130   ret <32 x i8> %res4
    131 }
    132 
    133 declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>)
    134 
    135 define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
    136 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
    137 ; X86:       # %bb.0:
    138 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    139 ; X86-NEXT:    vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
    140 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    141 ; X86-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
    142 ; X86-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    143 ; X86-NEXT:    vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
    144 ; X86-NEXT:    vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
    145 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    146 ; X86-NEXT:    retl # encoding: [0xc3]
    147 ;
    148 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
    149 ; X64:       # %bb.0:
    150 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
    151 ; X64-NEXT:    vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda]
    152 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    153 ; X64-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
    154 ; X64-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    155 ; X64-NEXT:    vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2]
    156 ; X64-NEXT:    vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
    157 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    158 ; X64-NEXT:    retq # encoding: [0xc3]
    159   %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2)
    160   %2 = bitcast i16 %x3 to <16 x i1>
    161   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
    162   %4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2)
    163   %5 = bitcast i16 %x3 to <16 x i1>
    164   %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
    165   %7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2)
    166   %res3 = add <16 x i8> %3, %6
    167   %res4 = add <16 x i8> %res3, %7
    168   ret <16 x i8> %res4
    169 }
    170 
    171 declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>)
    172 
    173 define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
    174 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
    175 ; X86:       # %bb.0:
    176 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    177 ; X86-NEXT:    vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
    178 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    179 ; X86-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
    180 ; X86-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    181 ; X86-NEXT:    vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
    182 ; X86-NEXT:    vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
    183 ; X86-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    184 ; X86-NEXT:    retl # encoding: [0xc3]
    185 ;
    186 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
    187 ; X64:       # %bb.0:
    188 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
    189 ; X64-NEXT:    vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda]
    190 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    191 ; X64-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
    192 ; X64-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    193 ; X64-NEXT:    vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2]
    194 ; X64-NEXT:    vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
    195 ; X64-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    196 ; X64-NEXT:    retq # encoding: [0xc3]
    197   %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2)
    198   %2 = bitcast i32 %x3 to <32 x i1>
    199   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
    200   %4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2)
    201   %5 = bitcast i32 %x3 to <32 x i1>
    202   %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
    203   %7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2)
    204   %res3 = add <32 x i8> %3, %6
    205   %res4 = add <32 x i8> %res3, %7
    206   ret <32 x i8> %res4
    207 }
    208 
    209 define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
    210 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
    211 ; X86:       # %bb.0:
    212 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
    213 ; X86-NEXT:    vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
    214 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    215 ; X86-NEXT:    vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
    216 ; X86-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    217 ; X86-NEXT:    vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
    218 ; X86-NEXT:    vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
    219 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    220 ; X86-NEXT:    retl # encoding: [0xc3]
    221 ;
    222 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
    223 ; X64:       # %bb.0:
    224 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
    225 ; X64-NEXT:    vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda]
    226 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    227 ; X64-NEXT:    vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
    228 ; X64-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    229 ; X64-NEXT:    vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2]
    230 ; X64-NEXT:    vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3]
    231 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    232 ; X64-NEXT:    retq # encoding: [0xc3]
    233   %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
    234   %2 = bitcast i16 %x3 to <16 x i1>
    235   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1
    236   %4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> zeroinitializer, <16 x i8> %x0, <16 x i8> %x2)
    237   %5 = bitcast i16 %x3 to <16 x i1>
    238   %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer
    239   %7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
    240   %res3 = add <16 x i8> %3, %6
    241   %res4 = add <16 x i8> %res3, %7
    242   ret <16 x i8> %res4
    243 }
    244 
    245 define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
    246 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
    247 ; X86:       # %bb.0:
    248 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
    249 ; X86-NEXT:    vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
    250 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    251 ; X86-NEXT:    vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
    252 ; X86-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    253 ; X86-NEXT:    vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
    254 ; X86-NEXT:    vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
    255 ; X86-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    256 ; X86-NEXT:    retl # encoding: [0xc3]
    257 ;
    258 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
    259 ; X64:       # %bb.0:
    260 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
    261 ; X64-NEXT:    vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda]
    262 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    263 ; X64-NEXT:    vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
    264 ; X64-NEXT:    vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4]
    265 ; X64-NEXT:    vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2]
    266 ; X64-NEXT:    vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3]
    267 ; X64-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    268 ; X64-NEXT:    retq # encoding: [0xc3]
    269   %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
    270   %2 = bitcast i32 %x3 to <32 x i1>
    271   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1
    272   %4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> zeroinitializer, <32 x i8> %x0, <32 x i8> %x2)
    273   %5 = bitcast i32 %x3 to <32 x i1>
    274   %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer
    275   %7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
    276   %res3 = add <32 x i8> %3, %6
    277   %res4 = add <32 x i8> %res3, %7
    278   ret <32 x i8> %res4
    279 }
    280 
    281 define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
    282 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
    283 ; X86:       # %bb.0:
    284 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    285 ; X86-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
    286 ; X86-NEXT:    retl # encoding: [0xc3]
    287 ;
    288 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
    289 ; X64:       # %bb.0:
    290 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    291 ; X64-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
    292 ; X64-NEXT:    retq # encoding: [0xc3]
    293   %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2)
    294   %2 = bitcast i16 %x3 to <16 x i1>
    295   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer
    296   ret <16 x i8> %3
    297 }
    298 
    299 define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
    300 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
    301 ; X86:       # %bb.0:
    302 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    303 ; X86-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
    304 ; X86-NEXT:    retl # encoding: [0xc3]
    305 ;
    306 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
    307 ; X64:       # %bb.0:
    308 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    309 ; X64-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
    310 ; X64-NEXT:    retq # encoding: [0xc3]
    311   %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2)
    312   %2 = bitcast i32 %x3 to <32 x i1>
    313   %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer
    314   ret <32 x i8> %3
    315 }
    316