Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,-avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,-avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512DQ
      6 
      7 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
      8 ; AVX512CD-LABEL: testv8i64:
      9 ; AVX512CD:       # %bb.0:
     10 ; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
     11 ; AVX512CD-NEXT:    retq
     12 ;
     13 ; AVX512CDBW-LABEL: testv8i64:
     14 ; AVX512CDBW:       # %bb.0:
     15 ; AVX512CDBW-NEXT:    vplzcntq %zmm0, %zmm0
     16 ; AVX512CDBW-NEXT:    retq
     17 ;
     18 ; AVX512BW-LABEL: testv8i64:
     19 ; AVX512BW:       # %bb.0:
     20 ; AVX512BW-NEXT:    vpsrlq $1, %zmm0, %zmm1
     21 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     22 ; AVX512BW-NEXT:    vpsrlq $2, %zmm0, %zmm1
     23 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     24 ; AVX512BW-NEXT:    vpsrlq $4, %zmm0, %zmm1
     25 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     26 ; AVX512BW-NEXT:    vpsrlq $8, %zmm0, %zmm1
     27 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     28 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
     29 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     30 ; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
     31 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     32 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     33 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
     34 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     35 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
     36 ; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
     37 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
     38 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
     39 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
     40 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
     41 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     42 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
     43 ; AVX512BW-NEXT:    retq
     44 ;
     45 ; AVX512DQ-LABEL: testv8i64:
     46 ; AVX512DQ:       # %bb.0:
     47 ; AVX512DQ-NEXT:    vpsrlq $1, %zmm0, %zmm1
     48 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     49 ; AVX512DQ-NEXT:    vpsrlq $2, %zmm0, %zmm1
     50 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     51 ; AVX512DQ-NEXT:    vpsrlq $4, %zmm0, %zmm1
     52 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     53 ; AVX512DQ-NEXT:    vpsrlq $8, %zmm0, %zmm1
     54 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     55 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
     56 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     57 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
     58 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
     59 ; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
     60 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     61 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     62 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
     63 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
     64 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
     65 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
     66 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
     67 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
     68 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
     69 ; AVX512DQ-NEXT:    vpxor %xmm3, %xmm3, %xmm3
     70 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
     71 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm5
     72 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
     73 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
     74 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
     75 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
     76 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
     77 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
     78 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     79 ; AVX512DQ-NEXT:    retq
     80   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
     81   ret <8 x i64> %out
     82 }
     83 
     84 define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
     85 ; AVX512CD-LABEL: testv8i64u:
     86 ; AVX512CD:       # %bb.0:
     87 ; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
     88 ; AVX512CD-NEXT:    retq
     89 ;
     90 ; AVX512CDBW-LABEL: testv8i64u:
     91 ; AVX512CDBW:       # %bb.0:
     92 ; AVX512CDBW-NEXT:    vplzcntq %zmm0, %zmm0
     93 ; AVX512CDBW-NEXT:    retq
     94 ;
     95 ; AVX512BW-LABEL: testv8i64u:
     96 ; AVX512BW:       # %bb.0:
     97 ; AVX512BW-NEXT:    vpsrlq $1, %zmm0, %zmm1
     98 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
     99 ; AVX512BW-NEXT:    vpsrlq $2, %zmm0, %zmm1
    100 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    101 ; AVX512BW-NEXT:    vpsrlq $4, %zmm0, %zmm1
    102 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    103 ; AVX512BW-NEXT:    vpsrlq $8, %zmm0, %zmm1
    104 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    105 ; AVX512BW-NEXT:    vpsrlq $16, %zmm0, %zmm1
    106 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    107 ; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm1
    108 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    109 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    110 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
    111 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    112 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    113 ; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    114 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    115 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    116 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    117 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    118 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    119 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
    120 ; AVX512BW-NEXT:    retq
    121 ;
    122 ; AVX512DQ-LABEL: testv8i64u:
    123 ; AVX512DQ:       # %bb.0:
    124 ; AVX512DQ-NEXT:    vpsrlq $1, %zmm0, %zmm1
    125 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    126 ; AVX512DQ-NEXT:    vpsrlq $2, %zmm0, %zmm1
    127 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    128 ; AVX512DQ-NEXT:    vpsrlq $4, %zmm0, %zmm1
    129 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    130 ; AVX512DQ-NEXT:    vpsrlq $8, %zmm0, %zmm1
    131 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    132 ; AVX512DQ-NEXT:    vpsrlq $16, %zmm0, %zmm1
    133 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    134 ; AVX512DQ-NEXT:    vpsrlq $32, %zmm0, %zmm1
    135 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    136 ; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    137 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    138 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    139 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    140 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    141 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    142 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
    143 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    144 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    145 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    146 ; AVX512DQ-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    147 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
    148 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm5
    149 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
    150 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
    151 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    152 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    153 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
    154 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
    155 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    156 ; AVX512DQ-NEXT:    retq
    157   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
    158   ret <8 x i64> %out
    159 }
    160 
    161 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
    162 ; AVX512CD-LABEL: testv16i32:
    163 ; AVX512CD:       # %bb.0:
    164 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    165 ; AVX512CD-NEXT:    retq
    166 ;
    167 ; AVX512CDBW-LABEL: testv16i32:
    168 ; AVX512CDBW:       # %bb.0:
    169 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    170 ; AVX512CDBW-NEXT:    retq
    171 ;
    172 ; AVX512BW-LABEL: testv16i32:
    173 ; AVX512BW:       # %bb.0:
    174 ; AVX512BW-NEXT:    vpsrld $1, %zmm0, %zmm1
    175 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    176 ; AVX512BW-NEXT:    vpsrld $2, %zmm0, %zmm1
    177 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    178 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
    179 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    180 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
    181 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    182 ; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
    183 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    184 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    185 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
    186 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    187 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    188 ; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    189 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    190 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    191 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    192 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    193 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    194 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
    195 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
    196 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
    197 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
    198 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    199 ; AVX512BW-NEXT:    retq
    200 ;
    201 ; AVX512DQ-LABEL: testv16i32:
    202 ; AVX512DQ:       # %bb.0:
    203 ; AVX512DQ-NEXT:    vpsrld $1, %zmm0, %zmm1
    204 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    205 ; AVX512DQ-NEXT:    vpsrld $2, %zmm0, %zmm1
    206 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    207 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1
    208 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    209 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
    210 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    211 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
    212 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    213 ; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    214 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    215 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    216 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    217 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    218 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    219 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
    220 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    221 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    222 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    223 ; AVX512DQ-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    224 ; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
    225 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm5, %ymm5
    226 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
    227 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
    228 ; AVX512DQ-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
    229 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm5
    230 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
    231 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
    232 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    233 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    234 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
    235 ; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7]
    236 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm2, %ymm2
    237 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5]
    238 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
    239 ; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
    240 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    241 ; AVX512DQ-NEXT:    retq
    242   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
    243   ret <16 x i32> %out
    244 }
    245 
    246 define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
    247 ; AVX512CD-LABEL: testv16i32u:
    248 ; AVX512CD:       # %bb.0:
    249 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    250 ; AVX512CD-NEXT:    retq
    251 ;
    252 ; AVX512CDBW-LABEL: testv16i32u:
    253 ; AVX512CDBW:       # %bb.0:
    254 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    255 ; AVX512CDBW-NEXT:    retq
    256 ;
    257 ; AVX512BW-LABEL: testv16i32u:
    258 ; AVX512BW:       # %bb.0:
    259 ; AVX512BW-NEXT:    vpsrld $1, %zmm0, %zmm1
    260 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    261 ; AVX512BW-NEXT:    vpsrld $2, %zmm0, %zmm1
    262 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    263 ; AVX512BW-NEXT:    vpsrld $4, %zmm0, %zmm1
    264 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    265 ; AVX512BW-NEXT:    vpsrld $8, %zmm0, %zmm1
    266 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    267 ; AVX512BW-NEXT:    vpsrld $16, %zmm0, %zmm1
    268 ; AVX512BW-NEXT:    vporq %zmm1, %zmm0, %zmm0
    269 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    270 ; AVX512BW-NEXT:    vpandnq %zmm1, %zmm0, %zmm2
    271 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    272 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    273 ; AVX512BW-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    274 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    275 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    276 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    277 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
    278 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    279 ; AVX512BW-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
    280 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm2, %zmm2
    281 ; AVX512BW-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
    282 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
    283 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
    284 ; AVX512BW-NEXT:    retq
    285 ;
    286 ; AVX512DQ-LABEL: testv16i32u:
    287 ; AVX512DQ:       # %bb.0:
    288 ; AVX512DQ-NEXT:    vpsrld $1, %zmm0, %zmm1
    289 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    290 ; AVX512DQ-NEXT:    vpsrld $2, %zmm0, %zmm1
    291 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    292 ; AVX512DQ-NEXT:    vpsrld $4, %zmm0, %zmm1
    293 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    294 ; AVX512DQ-NEXT:    vpsrld $8, %zmm0, %zmm1
    295 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    296 ; AVX512DQ-NEXT:    vpsrld $16, %zmm0, %zmm1
    297 ; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
    298 ; AVX512DQ-NEXT:    vpternlogq $15, %zmm0, %zmm0, %zmm0
    299 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    300 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    301 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    302 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
    303 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    304 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
    305 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    306 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    307 ; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
    308 ; AVX512DQ-NEXT:    vpxor %xmm3, %xmm3, %xmm3
    309 ; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7]
    310 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm5, %ymm5
    311 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5]
    312 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm1, %ymm1
    313 ; AVX512DQ-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
    314 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm5
    315 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
    316 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
    317 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    318 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    319 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
    320 ; AVX512DQ-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7]
    321 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm2, %ymm2
    322 ; AVX512DQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5]
    323 ; AVX512DQ-NEXT:    vpsadbw %ymm3, %ymm0, %ymm0
    324 ; AVX512DQ-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
    325 ; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    326 ; AVX512DQ-NEXT:    retq
    327   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
    328   ret <16 x i32> %out
    329 }
    330 
    331 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
    332 ; AVX512CD-LABEL: testv32i16:
    333 ; AVX512CD:       # %bb.0:
    334 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    335 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    336 ; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
    337 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    338 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
    339 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    340 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    341 ; AVX512CD-NEXT:    vpmovdw %zmm1, %ymm1
    342 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
    343 ; AVX512CD-NEXT:    retq
    344 ;
    345 ; AVX512CDBW-LABEL: testv32i16:
    346 ; AVX512CDBW:       # %bb.0:
    347 ; AVX512CDBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    348 ; AVX512CDBW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    349 ; AVX512CDBW-NEXT:    vplzcntd %zmm1, %zmm1
    350 ; AVX512CDBW-NEXT:    vpmovdw %zmm1, %ymm1
    351 ; AVX512CDBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    352 ; AVX512CDBW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
    353 ; AVX512CDBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    354 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    355 ; AVX512CDBW-NEXT:    vpmovdw %zmm0, %ymm0
    356 ; AVX512CDBW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
    357 ; AVX512CDBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    358 ; AVX512CDBW-NEXT:    retq
    359 ;
    360 ; AVX512BW-LABEL: testv32i16:
    361 ; AVX512BW:       # %bb.0:
    362 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    363 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    364 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    365 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    366 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm4
    367 ; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm4, %k0
    368 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm5
    369 ; AVX512BW-NEXT:    vpandq %zmm5, %zmm2, %zmm2
    370 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm4, %zmm1
    371 ; AVX512BW-NEXT:    vpshufb %zmm1, %zmm3, %zmm1
    372 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
    373 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
    374 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    375 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    376 ; AVX512BW-NEXT:    vpandq %zmm0, %zmm1, %zmm0
    377 ; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
    378 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
    379 ; AVX512BW-NEXT:    retq
    380 ;
    381 ; AVX512DQ-LABEL: testv32i16:
    382 ; AVX512DQ:       # %bb.0:
    383 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    384 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
    385 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    386 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    387 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm5
    388 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm5, %ymm5
    389 ; AVX512DQ-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    390 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm5, %ymm7
    391 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
    392 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
    393 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
    394 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm0, %ymm0
    395 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    396 ; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm0
    397 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
    398 ; AVX512DQ-NEXT:    vpaddw %ymm0, %ymm3, %ymm0
    399 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    400 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    401 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm5
    402 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm5, %ymm2
    403 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm2, %ymm5
    404 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
    405 ; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm4, %ymm2
    406 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm3, %ymm2
    407 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm1, %ymm1
    408 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    409 ; AVX512DQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
    410 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    411 ; AVX512DQ-NEXT:    vpaddw %ymm1, %ymm2, %ymm1
    412 ; AVX512DQ-NEXT:    retq
    413   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
    414   ret <32 x i16> %out
    415 }
    416 
    417 define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
    418 ; AVX512CD-LABEL: testv32i16u:
    419 ; AVX512CD:       # %bb.0:
    420 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    421 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    422 ; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
    423 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    424 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
    425 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    426 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    427 ; AVX512CD-NEXT:    vpmovdw %zmm1, %ymm1
    428 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
    429 ; AVX512CD-NEXT:    retq
    430 ;
    431 ; AVX512CDBW-LABEL: testv32i16u:
    432 ; AVX512CDBW:       # %bb.0:
    433 ; AVX512CDBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    434 ; AVX512CDBW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
    435 ; AVX512CDBW-NEXT:    vplzcntd %zmm1, %zmm1
    436 ; AVX512CDBW-NEXT:    vpmovdw %zmm1, %ymm1
    437 ; AVX512CDBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    438 ; AVX512CDBW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
    439 ; AVX512CDBW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    440 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    441 ; AVX512CDBW-NEXT:    vpmovdw %zmm0, %ymm0
    442 ; AVX512CDBW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
    443 ; AVX512CDBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    444 ; AVX512CDBW-NEXT:    retq
    445 ;
    446 ; AVX512BW-LABEL: testv32i16u:
    447 ; AVX512BW:       # %bb.0:
    448 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    449 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    450 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    451 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    452 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm4
    453 ; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm4, %k0
    454 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm5
    455 ; AVX512BW-NEXT:    vpandq %zmm5, %zmm2, %zmm2
    456 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm4, %zmm1
    457 ; AVX512BW-NEXT:    vpshufb %zmm1, %zmm3, %zmm1
    458 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
    459 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
    460 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
    461 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
    462 ; AVX512BW-NEXT:    vpandq %zmm0, %zmm1, %zmm0
    463 ; AVX512BW-NEXT:    vpsrlw $8, %zmm1, %zmm1
    464 ; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
    465 ; AVX512BW-NEXT:    retq
    466 ;
    467 ; AVX512DQ-LABEL: testv32i16u:
    468 ; AVX512DQ:       # %bb.0:
    469 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    470 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
    471 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    472 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    473 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm5
    474 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm5, %ymm5
    475 ; AVX512DQ-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    476 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm5, %ymm7
    477 ; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
    478 ; AVX512DQ-NEXT:    vpshufb %ymm5, %ymm4, %ymm5
    479 ; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm3, %ymm3
    480 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm0, %ymm0
    481 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    482 ; AVX512DQ-NEXT:    vpand %ymm0, %ymm3, %ymm0
    483 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
    484 ; AVX512DQ-NEXT:    vpaddw %ymm0, %ymm3, %ymm0
    485 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    486 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    487 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm5
    488 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm5, %ymm2
    489 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm2, %ymm5
    490 ; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
    491 ; AVX512DQ-NEXT:    vpshufb %ymm2, %ymm4, %ymm2
    492 ; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm3, %ymm2
    493 ; AVX512DQ-NEXT:    vpcmpeqb %ymm6, %ymm1, %ymm1
    494 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    495 ; AVX512DQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
    496 ; AVX512DQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    497 ; AVX512DQ-NEXT:    vpaddw %ymm1, %ymm2, %ymm1
    498 ; AVX512DQ-NEXT:    retq
    499   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
    500   ret <32 x i16> %out
    501 }
    502 
    503 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
    504 ; AVX512CD-LABEL: testv64i8:
    505 ; AVX512CD:       # %bb.0:
    506 ; AVX512CD-NEXT:    vextracti128 $1, %ymm0, %xmm2
    507 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    508 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    509 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    510 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    511 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    512 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    513 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    514 ; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
    515 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    516 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    517 ; AVX512CD-NEXT:    vextracti128 $1, %ymm1, %xmm2
    518 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    519 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    520 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    521 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    522 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    523 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    524 ; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
    525 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    526 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    527 ; AVX512CD-NEXT:    retq
    528 ;
    529 ; AVX512CDBW-LABEL: testv64i8:
    530 ; AVX512CDBW:       # %bb.0:
    531 ; AVX512CDBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    532 ; AVX512CDBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    533 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    534 ; AVX512CDBW-NEXT:    vplzcntd %zmm2, %zmm2
    535 ; AVX512CDBW-NEXT:    vpmovdb %zmm2, %xmm2
    536 ; AVX512CDBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    537 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    538 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    539 ; AVX512CDBW-NEXT:    vplzcntd %zmm1, %zmm1
    540 ; AVX512CDBW-NEXT:    vpmovdb %zmm1, %xmm1
    541 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    542 ; AVX512CDBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    543 ; AVX512CDBW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    544 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    545 ; AVX512CDBW-NEXT:    vplzcntd %zmm2, %zmm2
    546 ; AVX512CDBW-NEXT:    vpmovdb %zmm2, %xmm2
    547 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    548 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    549 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    550 ; AVX512CDBW-NEXT:    vpmovdb %zmm0, %xmm0
    551 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    552 ; AVX512CDBW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    553 ; AVX512CDBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    554 ; AVX512CDBW-NEXT:    retq
    555 ;
    556 ; AVX512BW-LABEL: testv64i8:
    557 ; AVX512BW:       # %bb.0:
    558 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    559 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    560 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    561 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    562 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    563 ; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm0, %k0
    564 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm4
    565 ; AVX512BW-NEXT:    vpandq %zmm4, %zmm2, %zmm2
    566 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    567 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    568 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
    569 ; AVX512BW-NEXT:    retq
    570 ;
    571 ; AVX512DQ-LABEL: testv64i8:
    572 ; AVX512DQ:       # %bb.0:
    573 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    574 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
    575 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    576 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    577 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
    578 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    579 ; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
    580 ; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm0, %ymm6
    581 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm3, %ymm3
    582 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    583 ; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
    584 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    585 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    586 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
    587 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    588 ; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm2
    589 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm3, %ymm2
    590 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    591 ; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    592 ; AVX512DQ-NEXT:    retq
    593   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
    594   ret <64 x i8> %out
    595 }
    596 
    597 define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
    598 ; AVX512CD-LABEL: testv64i8u:
    599 ; AVX512CD:       # %bb.0:
    600 ; AVX512CD-NEXT:    vextracti128 $1, %ymm0, %xmm2
    601 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    602 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    603 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    604 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    605 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    606 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    607 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    608 ; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
    609 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    610 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    611 ; AVX512CD-NEXT:    vextracti128 $1, %ymm1, %xmm2
    612 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    613 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    614 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    615 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    616 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    617 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    618 ; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
    619 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    620 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    621 ; AVX512CD-NEXT:    retq
    622 ;
    623 ; AVX512CDBW-LABEL: testv64i8u:
    624 ; AVX512CDBW:       # %bb.0:
    625 ; AVX512CDBW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    626 ; AVX512CDBW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    627 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    628 ; AVX512CDBW-NEXT:    vplzcntd %zmm2, %zmm2
    629 ; AVX512CDBW-NEXT:    vpmovdb %zmm2, %xmm2
    630 ; AVX512CDBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    631 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    632 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    633 ; AVX512CDBW-NEXT:    vplzcntd %zmm1, %zmm1
    634 ; AVX512CDBW-NEXT:    vpmovdb %zmm1, %xmm1
    635 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    636 ; AVX512CDBW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    637 ; AVX512CDBW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    638 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    639 ; AVX512CDBW-NEXT:    vplzcntd %zmm2, %zmm2
    640 ; AVX512CDBW-NEXT:    vpmovdb %zmm2, %xmm2
    641 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    642 ; AVX512CDBW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    643 ; AVX512CDBW-NEXT:    vplzcntd %zmm0, %zmm0
    644 ; AVX512CDBW-NEXT:    vpmovdb %zmm0, %xmm0
    645 ; AVX512CDBW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    646 ; AVX512CDBW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    647 ; AVX512CDBW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    648 ; AVX512CDBW-NEXT:    retq
    649 ;
    650 ; AVX512BW-LABEL: testv64i8u:
    651 ; AVX512BW:       # %bb.0:
    652 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    653 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
    654 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    655 ; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
    656 ; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
    657 ; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm0, %k0
    658 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm4
    659 ; AVX512BW-NEXT:    vpandq %zmm4, %zmm2, %zmm2
    660 ; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
    661 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
    662 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
    663 ; AVX512BW-NEXT:    retq
    664 ;
    665 ; AVX512DQ-LABEL: testv64i8u:
    666 ; AVX512DQ:       # %bb.0:
    667 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    668 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
    669 ; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    670 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    671 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
    672 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
    673 ; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
    674 ; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm0, %ymm6
    675 ; AVX512DQ-NEXT:    vpand %ymm6, %ymm3, %ymm3
    676 ; AVX512DQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
    677 ; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
    678 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
    679 ; AVX512DQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
    680 ; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
    681 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
    682 ; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm2
    683 ; AVX512DQ-NEXT:    vpand %ymm2, %ymm3, %ymm2
    684 ; AVX512DQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
    685 ; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    686 ; AVX512DQ-NEXT:    retq
    687   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
    688   ret <64 x i8> %out
    689 }
    690 
    691 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
    692 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
    693 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
    694 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
    695