Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX --check-prefix=AVX2
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512VL
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=AVX512VLBWDQ
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512VLCD
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512cd | FileCheck %s --check-prefix=X64 --check-prefix=NOBW --check-prefix=AVX512 --check-prefix=AVX512CD
      8 ;
      9 ; Just one 32-bit run to make sure we do reasonable things for i64 lzcnt.
     10 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX
     11 
     12 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
     13 ; AVX1-LABEL: testv4i64:
     14 ; AVX1:       # %bb.0:
     15 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
     16 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     17 ; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
     18 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
     19 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
     20 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
     21 ; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm6
     22 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
     23 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm6, %xmm7
     24 ; AVX1-NEXT:    vpand %xmm7, %xmm5, %xmm5
     25 ; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm6
     26 ; AVX1-NEXT:    vpaddb %xmm6, %xmm5, %xmm5
     27 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm6
     28 ; AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
     29 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
     30 ; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
     31 ; AVX1-NEXT:    vpaddw %xmm6, %xmm5, %xmm5
     32 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm6
     33 ; AVX1-NEXT:    vpsrld $16, %xmm6, %xmm6
     34 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
     35 ; AVX1-NEXT:    vpsrld $16, %xmm5, %xmm5
     36 ; AVX1-NEXT:    vpaddd %xmm6, %xmm5, %xmm5
     37 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm2
     38 ; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
     39 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
     40 ; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
     41 ; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
     42 ; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
     43 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
     44 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
     45 ; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
     46 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm3, %xmm6
     47 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
     48 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
     49 ; AVX1-NEXT:    vpaddb %xmm3, %xmm5, %xmm3
     50 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm4
     51 ; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
     52 ; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
     53 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
     54 ; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
     55 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm4
     56 ; AVX1-NEXT:    vpsrld $16, %xmm4, %xmm4
     57 ; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
     58 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
     59 ; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
     60 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
     61 ; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
     62 ; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
     63 ; AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm1
     64 ; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
     65 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
     66 ; AVX1-NEXT:    retq
     67 ;
     68 ; AVX2-LABEL: testv4i64:
     69 ; AVX2:       # %bb.0:
     70 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
     71 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
     72 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
     73 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
     74 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
     75 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
     76 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
     77 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
     78 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
     79 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
     80 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
     81 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
     82 ; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
     83 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
     84 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
     85 ; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
     86 ; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
     87 ; AVX2-NEXT:    vpsrld $16, %ymm2, %ymm2
     88 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
     89 ; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
     90 ; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
     91 ; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
     92 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
     93 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
     94 ; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm1
     95 ; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
     96 ; AVX2-NEXT:    retq
     97 ;
     98 ; AVX512VL-LABEL: testv4i64:
     99 ; AVX512VL:       # %bb.0:
    100 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    101 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    102 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    103 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    104 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    105 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    106 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    107 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    108 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    109 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    110 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    111 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    112 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
    113 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    114 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    115 ; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    116 ; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    117 ; AVX512VL-NEXT:    vpsrld $16, %ymm2, %ymm2
    118 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    119 ; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
    120 ; AVX512VL-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    121 ; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    122 ; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
    123 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    124 ; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
    125 ; AVX512VL-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    126 ; AVX512VL-NEXT:    retq
    127 ;
    128 ; AVX512VLBWDQ-LABEL: testv4i64:
    129 ; AVX512VLBWDQ:       # %bb.0:
    130 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    131 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    132 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    133 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    134 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    135 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    136 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    137 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    138 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    139 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    140 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    141 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    142 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    143 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    144 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    145 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    146 ; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    147 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm2, %ymm2
    148 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    149 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
    150 ; AVX512VLBWDQ-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    151 ; AVX512VLBWDQ-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    152 ; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm0, %ymm0
    153 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    154 ; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
    155 ; AVX512VLBWDQ-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    156 ; AVX512VLBWDQ-NEXT:    retq
    157 ;
    158 ; AVX512VLCD-LABEL: testv4i64:
    159 ; AVX512VLCD:       # %bb.0:
    160 ; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
    161 ; AVX512VLCD-NEXT:    retq
    162 ;
    163 ; AVX512CD-LABEL: testv4i64:
    164 ; AVX512CD:       # %bb.0:
    165 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    166 ; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
    167 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    168 ; AVX512CD-NEXT:    retq
    169 ;
    170 ; X32-AVX-LABEL: testv4i64:
    171 ; X32-AVX:       # %bb.0:
    172 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    173 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    174 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    175 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    176 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    177 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    178 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    179 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    180 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    181 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    182 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    183 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    184 ; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
    185 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    186 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    187 ; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    188 ; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    189 ; X32-AVX-NEXT:    vpsrld $16, %ymm2, %ymm2
    190 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    191 ; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
    192 ; X32-AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    193 ; X32-AVX-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    194 ; X32-AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
    195 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    196 ; X32-AVX-NEXT:    vpsrlq $32, %ymm1, %ymm1
    197 ; X32-AVX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    198 ; X32-AVX-NEXT:    retl
    199 
    200   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
    201   ret <4 x i64> %out
    202 }
    203 
    204 define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
    205 ; AVX1-LABEL: testv4i64u:
    206 ; AVX1:       # %bb.0:
    207 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    208 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    209 ; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm1
    210 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    211 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm5
    212 ; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm1
    213 ; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm6
    214 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    215 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm6, %xmm7
    216 ; AVX1-NEXT:    vpand %xmm7, %xmm5, %xmm5
    217 ; AVX1-NEXT:    vpshufb %xmm6, %xmm4, %xmm6
    218 ; AVX1-NEXT:    vpaddb %xmm6, %xmm5, %xmm5
    219 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm2, %xmm6
    220 ; AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
    221 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
    222 ; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
    223 ; AVX1-NEXT:    vpaddw %xmm6, %xmm5, %xmm5
    224 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm2, %xmm6
    225 ; AVX1-NEXT:    vpsrld $16, %xmm6, %xmm6
    226 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm6
    227 ; AVX1-NEXT:    vpsrld $16, %xmm5, %xmm5
    228 ; AVX1-NEXT:    vpaddd %xmm6, %xmm5, %xmm5
    229 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm2, %xmm2
    230 ; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm2
    231 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
    232 ; AVX1-NEXT:    vpsrlq $32, %xmm5, %xmm5
    233 ; AVX1-NEXT:    vpaddq %xmm2, %xmm5, %xmm2
    234 ; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm5
    235 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
    236 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm6
    237 ; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
    238 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm3, %xmm6
    239 ; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
    240 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    241 ; AVX1-NEXT:    vpaddb %xmm3, %xmm5, %xmm3
    242 ; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm4
    243 ; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
    244 ; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
    245 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    246 ; AVX1-NEXT:    vpaddw %xmm4, %xmm3, %xmm3
    247 ; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm4
    248 ; AVX1-NEXT:    vpsrld $16, %xmm4, %xmm4
    249 ; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm4
    250 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
    251 ; AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
    252 ; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
    253 ; AVX1-NEXT:    vpsrlq $32, %xmm0, %xmm0
    254 ; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
    255 ; AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm1
    256 ; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    257 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    258 ; AVX1-NEXT:    retq
    259 ;
    260 ; AVX2-LABEL: testv4i64u:
    261 ; AVX2:       # %bb.0:
    262 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    263 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
    264 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    265 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    266 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
    267 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
    268 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    269 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    270 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
    271 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    272 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    273 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    274 ; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
    275 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
    276 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
    277 ; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    278 ; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    279 ; AVX2-NEXT:    vpsrld $16, %ymm2, %ymm2
    280 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
    281 ; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
    282 ; AVX2-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    283 ; AVX2-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    284 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
    285 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
    286 ; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm1
    287 ; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    288 ; AVX2-NEXT:    retq
    289 ;
    290 ; AVX512VL-LABEL: testv4i64u:
    291 ; AVX512VL:       # %bb.0:
    292 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    293 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    294 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    295 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    296 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    297 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    298 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    299 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    300 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    301 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    302 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    303 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    304 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
    305 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    306 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    307 ; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    308 ; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    309 ; AVX512VL-NEXT:    vpsrld $16, %ymm2, %ymm2
    310 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    311 ; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
    312 ; AVX512VL-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    313 ; AVX512VL-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    314 ; AVX512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
    315 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    316 ; AVX512VL-NEXT:    vpsrlq $32, %ymm1, %ymm1
    317 ; AVX512VL-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    318 ; AVX512VL-NEXT:    retq
    319 ;
    320 ; AVX512VLBWDQ-LABEL: testv4i64u:
    321 ; AVX512VLBWDQ:       # %bb.0:
    322 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    323 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    324 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    325 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    326 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    327 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    328 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    329 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    330 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    331 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    332 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    333 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    334 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    335 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    336 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    337 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    338 ; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    339 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm2, %ymm2
    340 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    341 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
    342 ; AVX512VLBWDQ-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    343 ; AVX512VLBWDQ-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    344 ; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm0, %ymm0
    345 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    346 ; AVX512VLBWDQ-NEXT:    vpsrlq $32, %ymm1, %ymm1
    347 ; AVX512VLBWDQ-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    348 ; AVX512VLBWDQ-NEXT:    retq
    349 ;
    350 ; AVX512VLCD-LABEL: testv4i64u:
    351 ; AVX512VLCD:       # %bb.0:
    352 ; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
    353 ; AVX512VLCD-NEXT:    retq
    354 ;
    355 ; AVX512CD-LABEL: testv4i64u:
    356 ; AVX512CD:       # %bb.0:
    357 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    358 ; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
    359 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    360 ; AVX512CD-NEXT:    retq
    361 ;
    362 ; X32-AVX-LABEL: testv4i64u:
    363 ; X32-AVX:       # %bb.0:
    364 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    365 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    366 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    367 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    368 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    369 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    370 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    371 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    372 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    373 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    374 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    375 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    376 ; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
    377 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    378 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    379 ; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    380 ; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm2
    381 ; X32-AVX-NEXT:    vpsrld $16, %ymm2, %ymm2
    382 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    383 ; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
    384 ; X32-AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
    385 ; X32-AVX-NEXT:    vpcmpeqd %ymm4, %ymm0, %ymm0
    386 ; X32-AVX-NEXT:    vpsrlq $32, %ymm0, %ymm0
    387 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    388 ; X32-AVX-NEXT:    vpsrlq $32, %ymm1, %ymm1
    389 ; X32-AVX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    390 ; X32-AVX-NEXT:    retl
    391 
    392   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
    393   ret <4 x i64> %out
    394 }
    395 
    396 define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
    397 ; AVX1-LABEL: testv8i32:
    398 ; AVX1:       # %bb.0:
    399 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    400 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    401 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
    402 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    403 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    404 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
    405 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
    406 ; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    407 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
    408 ; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
    409 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
    410 ; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
    411 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm5
    412 ; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
    413 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm5
    414 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    415 ; AVX1-NEXT:    vpaddw %xmm5, %xmm3, %xmm3
    416 ; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm1, %xmm1
    417 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
    418 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
    419 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
    420 ; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
    421 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
    422 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    423 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
    424 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
    425 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
    426 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
    427 ; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
    428 ; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
    429 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm3
    430 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    431 ; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm3
    432 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
    433 ; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
    434 ; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm0, %xmm0
    435 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    436 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
    437 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
    438 ; AVX1-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
    439 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    440 ; AVX1-NEXT:    retq
    441 ;
    442 ; AVX2-LABEL: testv8i32:
    443 ; AVX2:       # %bb.0:
    444 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    445 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
    446 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    447 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    448 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
    449 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
    450 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    451 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    452 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
    453 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    454 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    455 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    456 ; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
    457 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
    458 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
    459 ; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    460 ; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    461 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
    462 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
    463 ; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
    464 ; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    465 ; AVX2-NEXT:    retq
    466 ;
    467 ; AVX512VL-LABEL: testv8i32:
    468 ; AVX512VL:       # %bb.0:
    469 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    470 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    471 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    472 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    473 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    474 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    475 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    476 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    477 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    478 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    479 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    480 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    481 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
    482 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    483 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    484 ; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    485 ; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    486 ; AVX512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
    487 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    488 ; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
    489 ; AVX512VL-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    490 ; AVX512VL-NEXT:    retq
    491 ;
    492 ; AVX512VLBWDQ-LABEL: testv8i32:
    493 ; AVX512VLBWDQ:       # %bb.0:
    494 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    495 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    496 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    497 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    498 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    499 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    500 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    501 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    502 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    503 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    504 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    505 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    506 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    507 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    508 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    509 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    510 ; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    511 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm0, %ymm0
    512 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    513 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
    514 ; AVX512VLBWDQ-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    515 ; AVX512VLBWDQ-NEXT:    retq
    516 ;
    517 ; AVX512VLCD-LABEL: testv8i32:
    518 ; AVX512VLCD:       # %bb.0:
    519 ; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
    520 ; AVX512VLCD-NEXT:    retq
    521 ;
    522 ; AVX512CD-LABEL: testv8i32:
    523 ; AVX512CD:       # %bb.0:
    524 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    525 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    526 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    527 ; AVX512CD-NEXT:    retq
    528 ;
    529 ; X32-AVX-LABEL: testv8i32:
    530 ; X32-AVX:       # %bb.0:
    531 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    532 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    533 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    534 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    535 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    536 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    537 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    538 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    539 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    540 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    541 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    542 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    543 ; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
    544 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    545 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    546 ; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    547 ; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    548 ; X32-AVX-NEXT:    vpsrld $16, %ymm0, %ymm0
    549 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    550 ; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
    551 ; X32-AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    552 ; X32-AVX-NEXT:    retl
    553 
    554   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
    555   ret <8 x i32> %out
    556 }
    557 
    558 define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
    559 ; AVX1-LABEL: testv8i32u:
    560 ; AVX1:       # %bb.0:
    561 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    562 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    563 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
    564 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    565 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    566 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
    567 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
    568 ; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    569 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
    570 ; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
    571 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
    572 ; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
    573 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm5
    574 ; AVX1-NEXT:    vpsrlw $8, %xmm5, %xmm5
    575 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm5
    576 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    577 ; AVX1-NEXT:    vpaddw %xmm5, %xmm3, %xmm3
    578 ; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm1, %xmm1
    579 ; AVX1-NEXT:    vpsrld $16, %xmm1, %xmm1
    580 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
    581 ; AVX1-NEXT:    vpsrld $16, %xmm3, %xmm3
    582 ; AVX1-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
    583 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
    584 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    585 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
    586 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
    587 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
    588 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
    589 ; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
    590 ; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
    591 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm3
    592 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    593 ; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm3
    594 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
    595 ; AVX1-NEXT:    vpaddw %xmm3, %xmm2, %xmm2
    596 ; AVX1-NEXT:    vpcmpeqw %xmm6, %xmm0, %xmm0
    597 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
    598 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
    599 ; AVX1-NEXT:    vpsrld $16, %xmm2, %xmm2
    600 ; AVX1-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
    601 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    602 ; AVX1-NEXT:    retq
    603 ;
    604 ; AVX2-LABEL: testv8i32u:
    605 ; AVX2:       # %bb.0:
    606 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    607 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
    608 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    609 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    610 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
    611 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
    612 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    613 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    614 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
    615 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    616 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    617 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    618 ; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
    619 ; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm2
    620 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
    621 ; AVX2-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    622 ; AVX2-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    623 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
    624 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
    625 ; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
    626 ; AVX2-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    627 ; AVX2-NEXT:    retq
    628 ;
    629 ; AVX512VL-LABEL: testv8i32u:
    630 ; AVX512VL:       # %bb.0:
    631 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    632 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    633 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    634 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    635 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    636 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    637 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    638 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    639 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    640 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    641 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    642 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    643 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
    644 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm2
    645 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    646 ; AVX512VL-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    647 ; AVX512VL-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    648 ; AVX512VL-NEXT:    vpsrld $16, %ymm0, %ymm0
    649 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    650 ; AVX512VL-NEXT:    vpsrld $16, %ymm1, %ymm1
    651 ; AVX512VL-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    652 ; AVX512VL-NEXT:    retq
    653 ;
    654 ; AVX512VLBWDQ-LABEL: testv8i32u:
    655 ; AVX512VLBWDQ:       # %bb.0:
    656 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    657 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    658 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    659 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    660 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    661 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    662 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    663 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    664 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    665 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    666 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    667 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    668 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm2, %ymm2
    669 ; AVX512VLBWDQ-NEXT:    vpand %ymm2, %ymm1, %ymm2
    670 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    671 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    672 ; AVX512VLBWDQ-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    673 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm0, %ymm0
    674 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    675 ; AVX512VLBWDQ-NEXT:    vpsrld $16, %ymm1, %ymm1
    676 ; AVX512VLBWDQ-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    677 ; AVX512VLBWDQ-NEXT:    retq
    678 ;
    679 ; AVX512VLCD-LABEL: testv8i32u:
    680 ; AVX512VLCD:       # %bb.0:
    681 ; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
    682 ; AVX512VLCD-NEXT:    retq
    683 ;
    684 ; AVX512CD-LABEL: testv8i32u:
    685 ; AVX512CD:       # %bb.0:
    686 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    687 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    688 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
    689 ; AVX512CD-NEXT:    retq
    690 ;
    691 ; X32-AVX-LABEL: testv8i32u:
    692 ; X32-AVX:       # %bb.0:
    693 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    694 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    695 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    696 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    697 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    698 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    699 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    700 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    701 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    702 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    703 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    704 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm2
    705 ; X32-AVX-NEXT:    vpsrlw $8, %ymm2, %ymm2
    706 ; X32-AVX-NEXT:    vpand %ymm2, %ymm1, %ymm2
    707 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    708 ; X32-AVX-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
    709 ; X32-AVX-NEXT:    vpcmpeqw %ymm4, %ymm0, %ymm0
    710 ; X32-AVX-NEXT:    vpsrld $16, %ymm0, %ymm0
    711 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    712 ; X32-AVX-NEXT:    vpsrld $16, %ymm1, %ymm1
    713 ; X32-AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    714 ; X32-AVX-NEXT:    retl
    715 
    716   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
    717   ret <8 x i32> %out
    718 }
    719 
    720 define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
    721 ; AVX1-LABEL: testv16i16:
    722 ; AVX1:       # %bb.0:
    723 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    724 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    725 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
    726 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    727 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    728 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
    729 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
    730 ; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    731 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
    732 ; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
    733 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
    734 ; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
    735 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm1
    736 ; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
    737 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
    738 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    739 ; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
    740 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
    741 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    742 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
    743 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
    744 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
    745 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
    746 ; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
    747 ; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
    748 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm0
    749 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
    750 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
    751 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
    752 ; AVX1-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
    753 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    754 ; AVX1-NEXT:    retq
    755 ;
    756 ; AVX2-LABEL: testv16i16:
    757 ; AVX2:       # %bb.0:
    758 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    759 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
    760 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    761 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    762 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
    763 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
    764 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    765 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    766 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
    767 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    768 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    769 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    770 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
    771 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
    772 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
    773 ; AVX2-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    774 ; AVX2-NEXT:    retq
    775 ;
    776 ; AVX512VL-LABEL: testv16i16:
    777 ; AVX512VL:       # %bb.0:
    778 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    779 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    780 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    781 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    782 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    783 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    784 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    785 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    786 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    787 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    788 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    789 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    790 ; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
    791 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    792 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    793 ; AVX512VL-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    794 ; AVX512VL-NEXT:    retq
    795 ;
    796 ; AVX512VLBWDQ-LABEL: testv16i16:
    797 ; AVX512VLBWDQ:       # %bb.0:
    798 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    799 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    800 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    801 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    802 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    803 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    804 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    805 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    806 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    807 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    808 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    809 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    810 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    811 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    812 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    813 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    814 ; AVX512VLBWDQ-NEXT:    retq
    815 ;
    816 ; AVX512-LABEL: testv16i16:
    817 ; AVX512:       # %bb.0:
    818 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    819 ; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
    820 ; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
    821 ; AVX512-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
    822 ; AVX512-NEXT:    retq
    823 ;
    824 ; X32-AVX-LABEL: testv16i16:
    825 ; X32-AVX:       # %bb.0:
    826 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    827 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    828 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    829 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    830 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    831 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    832 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    833 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    834 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    835 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    836 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    837 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    838 ; X32-AVX-NEXT:    vpsrlw $8, %ymm0, %ymm0
    839 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    840 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    841 ; X32-AVX-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    842 ; X32-AVX-NEXT:    retl
    843   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
    844   ret <16 x i16> %out
    845 }
    846 
    847 define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
    848 ; AVX1-LABEL: testv16i16u:
    849 ; AVX1:       # %bb.0:
    850 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    851 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    852 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
    853 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    854 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    855 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm5
    856 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm5
    857 ; AVX1-NEXT:    vpxor %xmm6, %xmm6, %xmm6
    858 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm5, %xmm7
    859 ; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
    860 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
    861 ; AVX1-NEXT:    vpaddb %xmm5, %xmm3, %xmm3
    862 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm1, %xmm1
    863 ; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
    864 ; AVX1-NEXT:    vpand %xmm1, %xmm3, %xmm1
    865 ; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
    866 ; AVX1-NEXT:    vpaddw %xmm1, %xmm3, %xmm1
    867 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
    868 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    869 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm5
    870 ; AVX1-NEXT:    vpand %xmm2, %xmm5, %xmm2
    871 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm2, %xmm5
    872 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
    873 ; AVX1-NEXT:    vpshufb %xmm2, %xmm4, %xmm2
    874 ; AVX1-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
    875 ; AVX1-NEXT:    vpcmpeqb %xmm6, %xmm0, %xmm0
    876 ; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
    877 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
    878 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
    879 ; AVX1-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
    880 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    881 ; AVX1-NEXT:    retq
    882 ;
    883 ; AVX2-LABEL: testv16i16u:
    884 ; AVX2:       # %bb.0:
    885 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    886 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
    887 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    888 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    889 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
    890 ; AVX2-NEXT:    vpand %ymm1, %ymm4, %ymm1
    891 ; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    892 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    893 ; AVX2-NEXT:    vpand %ymm5, %ymm2, %ymm2
    894 ; AVX2-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    895 ; AVX2-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    896 ; AVX2-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    897 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
    898 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
    899 ; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
    900 ; AVX2-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    901 ; AVX2-NEXT:    retq
    902 ;
    903 ; AVX512VL-LABEL: testv16i16u:
    904 ; AVX512VL:       # %bb.0:
    905 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    906 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
    907 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    908 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    909 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
    910 ; AVX512VL-NEXT:    vpand %ymm1, %ymm4, %ymm1
    911 ; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    912 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    913 ; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
    914 ; AVX512VL-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    915 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    916 ; AVX512VL-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    917 ; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
    918 ; AVX512VL-NEXT:    vpand %ymm0, %ymm1, %ymm0
    919 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
    920 ; AVX512VL-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    921 ; AVX512VL-NEXT:    retq
    922 ;
    923 ; AVX512VLBWDQ-LABEL: testv16i16u:
    924 ; AVX512VLBWDQ:       # %bb.0:
    925 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    926 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
    927 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    928 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    929 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
    930 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm4, %ymm1
    931 ; AVX512VLBWDQ-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    932 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    933 ; AVX512VLBWDQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
    934 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    935 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    936 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    937 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
    938 ; AVX512VLBWDQ-NEXT:    vpand %ymm0, %ymm1, %ymm0
    939 ; AVX512VLBWDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
    940 ; AVX512VLBWDQ-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    941 ; AVX512VLBWDQ-NEXT:    retq
    942 ;
    943 ; AVX512-LABEL: testv16i16u:
    944 ; AVX512:       # %bb.0:
    945 ; AVX512-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
    946 ; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
    947 ; AVX512-NEXT:    vpmovdw %zmm0, %ymm0
    948 ; AVX512-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
    949 ; AVX512-NEXT:    retq
    950 ;
    951 ; X32-AVX-LABEL: testv16i16u:
    952 ; X32-AVX:       # %bb.0:
    953 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    954 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
    955 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    956 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
    957 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm4
    958 ; X32-AVX-NEXT:    vpand %ymm1, %ymm4, %ymm1
    959 ; X32-AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
    960 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm1, %ymm5
    961 ; X32-AVX-NEXT:    vpand %ymm5, %ymm2, %ymm2
    962 ; X32-AVX-NEXT:    vpshufb %ymm1, %ymm3, %ymm1
    963 ; X32-AVX-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
    964 ; X32-AVX-NEXT:    vpcmpeqb %ymm4, %ymm0, %ymm0
    965 ; X32-AVX-NEXT:    vpsrlw $8, %ymm0, %ymm0
    966 ; X32-AVX-NEXT:    vpand %ymm0, %ymm1, %ymm0
    967 ; X32-AVX-NEXT:    vpsrlw $8, %ymm1, %ymm1
    968 ; X32-AVX-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
    969 ; X32-AVX-NEXT:    retl
    970   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
    971   ret <16 x i16> %out
    972 }
    973 
    974 define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
    975 ; AVX1-LABEL: testv32i8:
    976 ; AVX1:       # %bb.0:
    977 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    978 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
    979 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
    980 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
    981 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    982 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
    983 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
    984 ; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
    985 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm1, %xmm6
    986 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
    987 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
    988 ; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
    989 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
    990 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
    991 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
    992 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
    993 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm0, %xmm2
    994 ; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
    995 ; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
    996 ; AVX1-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
    997 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    998 ; AVX1-NEXT:    retq
    999 ;
   1000 ; AVX2-LABEL: testv32i8:
   1001 ; AVX2:       # %bb.0:
   1002 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1003 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1004 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1005 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1006 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1007 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1008 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1009 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1010 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1011 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1012 ; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1013 ; AVX2-NEXT:    retq
   1014 ;
   1015 ; AVX512VL-LABEL: testv32i8:
   1016 ; AVX512VL:       # %bb.0:
   1017 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1018 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1019 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1020 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1021 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1022 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1023 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1024 ; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1025 ; AVX512VL-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1026 ; AVX512VL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1027 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1028 ; AVX512VL-NEXT:    retq
   1029 ;
   1030 ; AVX512VLBWDQ-LABEL: testv32i8:
   1031 ; AVX512VLBWDQ:       # %bb.0:
   1032 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1033 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1034 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1035 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1036 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1037 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1038 ; AVX512VLBWDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1039 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1040 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1041 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1042 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1043 ; AVX512VLBWDQ-NEXT:    retq
   1044 ;
   1045 ; AVX512-LABEL: testv32i8:
   1046 ; AVX512:       # %bb.0:
   1047 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1048 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
   1049 ; AVX512-NEXT:    vplzcntd %zmm1, %zmm1
   1050 ; AVX512-NEXT:    vpmovdb %zmm1, %xmm1
   1051 ; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
   1052 ; AVX512-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
   1053 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   1054 ; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
   1055 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
   1056 ; AVX512-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
   1057 ; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1058 ; AVX512-NEXT:    retq
   1059 ;
   1060 ; X32-AVX-LABEL: testv32i8:
   1061 ; X32-AVX:       # %bb.0:
   1062 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1063 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1064 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1065 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1066 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1067 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1068 ; X32-AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1069 ; X32-AVX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1070 ; X32-AVX-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1071 ; X32-AVX-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1072 ; X32-AVX-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1073 ; X32-AVX-NEXT:    retl
   1074   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
   1075   ret <32 x i8> %out
   1076 }
   1077 
   1078 define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
   1079 ; AVX1-LABEL: testv32i8u:
   1080 ; AVX1:       # %bb.0:
   1081 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   1082 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1083 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
   1084 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1085 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
   1086 ; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
   1087 ; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
   1088 ; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
   1089 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm1, %xmm6
   1090 ; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
   1091 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
   1092 ; AVX1-NEXT:    vpaddb %xmm1, %xmm3, %xmm1
   1093 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
   1094 ; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
   1095 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
   1096 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
   1097 ; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm0, %xmm2
   1098 ; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
   1099 ; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
   1100 ; AVX1-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
   1101 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1102 ; AVX1-NEXT:    retq
   1103 ;
   1104 ; AVX2-LABEL: testv32i8u:
   1105 ; AVX2:       # %bb.0:
   1106 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1107 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1108 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1109 ; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1110 ; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1111 ; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1112 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1113 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1114 ; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1115 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1116 ; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1117 ; AVX2-NEXT:    retq
   1118 ;
   1119 ; AVX512VL-LABEL: testv32i8u:
   1120 ; AVX512VL:       # %bb.0:
   1121 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1122 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1123 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1124 ; AVX512VL-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1125 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1126 ; AVX512VL-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1127 ; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1128 ; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1129 ; AVX512VL-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1130 ; AVX512VL-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1131 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1132 ; AVX512VL-NEXT:    retq
   1133 ;
   1134 ; AVX512VLBWDQ-LABEL: testv32i8u:
   1135 ; AVX512VLBWDQ:       # %bb.0:
   1136 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1137 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1138 ; AVX512VLBWDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1139 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1140 ; AVX512VLBWDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1141 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1142 ; AVX512VLBWDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1143 ; AVX512VLBWDQ-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1144 ; AVX512VLBWDQ-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1145 ; AVX512VLBWDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1146 ; AVX512VLBWDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1147 ; AVX512VLBWDQ-NEXT:    retq
   1148 ;
   1149 ; AVX512-LABEL: testv32i8u:
   1150 ; AVX512:       # %bb.0:
   1151 ; AVX512-NEXT:    vextracti128 $1, %ymm0, %xmm1
   1152 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
   1153 ; AVX512-NEXT:    vplzcntd %zmm1, %zmm1
   1154 ; AVX512-NEXT:    vpmovdb %zmm1, %xmm1
   1155 ; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
   1156 ; AVX512-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
   1157 ; AVX512-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   1158 ; AVX512-NEXT:    vplzcntd %zmm0, %zmm0
   1159 ; AVX512-NEXT:    vpmovdb %zmm0, %xmm0
   1160 ; AVX512-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
   1161 ; AVX512-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1162 ; AVX512-NEXT:    retq
   1163 ;
   1164 ; X32-AVX-LABEL: testv32i8u:
   1165 ; X32-AVX:       # %bb.0:
   1166 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
   1167 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm2
   1168 ; X32-AVX-NEXT:    vmovdqa {{.*#+}} ymm3 = [4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0,4,3,2,2,1,1,1,1,0,0,0,0,0,0,0,0]
   1169 ; X32-AVX-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
   1170 ; X32-AVX-NEXT:    vpsrlw $4, %ymm0, %ymm0
   1171 ; X32-AVX-NEXT:    vpand %ymm1, %ymm0, %ymm0
   1172 ; X32-AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1173 ; X32-AVX-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm1
   1174 ; X32-AVX-NEXT:    vpand %ymm1, %ymm2, %ymm1
   1175 ; X32-AVX-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
   1176 ; X32-AVX-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
   1177 ; X32-AVX-NEXT:    retl
   1178   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
   1179   ret <32 x i8> %out
   1180 }
   1181 
   1182 define <4 x i64> @foldv4i64() nounwind {
   1183 ; X64-LABEL: foldv4i64:
   1184 ; X64:       # %bb.0:
   1185 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
   1186 ; X64-NEXT:    retq
   1187 ;
   1188 ; X32-AVX-LABEL: foldv4i64:
   1189 ; X32-AVX:       # %bb.0:
   1190 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
   1191 ; X32-AVX-NEXT:    retl
   1192   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
   1193   ret <4 x i64> %out
   1194 }
   1195 
   1196 define <4 x i64> @foldv4i64u() nounwind {
   1197 ; X64-LABEL: foldv4i64u:
   1198 ; X64:       # %bb.0:
   1199 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
   1200 ; X64-NEXT:    retq
   1201 ;
   1202 ; X32-AVX-LABEL: foldv4i64u:
   1203 ; X32-AVX:       # %bb.0:
   1204 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,0,0,64,0,56,0]
   1205 ; X32-AVX-NEXT:    retl
   1206   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
   1207   ret <4 x i64> %out
   1208 }
   1209 
   1210 define <8 x i32> @foldv8i32() nounwind {
   1211 ; X64-LABEL: foldv8i32:
   1212 ; X64:       # %bb.0:
   1213 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
   1214 ; X64-NEXT:    retq
   1215 ;
   1216 ; X32-AVX-LABEL: foldv8i32:
   1217 ; X32-AVX:       # %bb.0:
   1218 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
   1219 ; X32-AVX-NEXT:    retl
   1220   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
   1221   ret <8 x i32> %out
   1222 }
   1223 
   1224 define <8 x i32> @foldv8i32u() nounwind {
   1225 ; X64-LABEL: foldv8i32u:
   1226 ; X64:       # %bb.0:
   1227 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
   1228 ; X64-NEXT:    retq
   1229 ;
   1230 ; X32-AVX-LABEL: foldv8i32u:
   1231 ; X32-AVX:       # %bb.0:
   1232 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
   1233 ; X32-AVX-NEXT:    retl
   1234   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
   1235   ret <8 x i32> %out
   1236 }
   1237 
   1238 define <16 x i16> @foldv16i16() nounwind {
   1239 ; X64-LABEL: foldv16i16:
   1240 ; X64:       # %bb.0:
   1241 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
   1242 ; X64-NEXT:    retq
   1243 ;
   1244 ; X32-AVX-LABEL: foldv16i16:
   1245 ; X32-AVX:       # %bb.0:
   1246 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
   1247 ; X32-AVX-NEXT:    retl
   1248   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
   1249   ret <16 x i16> %out
   1250 }
   1251 
   1252 define <16 x i16> @foldv16i16u() nounwind {
   1253 ; X64-LABEL: foldv16i16u:
   1254 ; X64:       # %bb.0:
   1255 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
   1256 ; X64-NEXT:    retq
   1257 ;
   1258 ; X32-AVX-LABEL: foldv16i16u:
   1259 ; X32-AVX:       # %bb.0:
   1260 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
   1261 ; X32-AVX-NEXT:    retl
   1262   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
   1263   ret <16 x i16> %out
   1264 }
   1265 
   1266 define <32 x i8> @foldv32i8() nounwind {
   1267 ; X64-LABEL: foldv32i8:
   1268 ; X64:       # %bb.0:
   1269 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
   1270 ; X64-NEXT:    retq
   1271 ;
   1272 ; X32-AVX-LABEL: foldv32i8:
   1273 ; X32-AVX:       # %bb.0:
   1274 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
   1275 ; X32-AVX-NEXT:    retl
   1276   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
   1277   ret <32 x i8> %out
   1278 }
   1279 
   1280 define <32 x i8> @foldv32i8u() nounwind {
   1281 ; X64-LABEL: foldv32i8u:
   1282 ; X64:       # %bb.0:
   1283 ; X64-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
   1284 ; X64-NEXT:    retq
   1285 ;
   1286 ; X32-AVX-LABEL: foldv32i8u:
   1287 ; X32-AVX:       # %bb.0:
   1288 ; X32-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
   1289 ; X32-AVX-NEXT:    retl
   1290   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
   1291   ret <32 x i8> %out
   1292 }
   1293 
   1294 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
   1295 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
   1296 declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
   1297 declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
   1298