Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      4 
      5 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
      6 ; ALL-LABEL: testv8i64:
      7 ; ALL:       ## BB#0:
      8 ; ALL-NEXT:    vplzcntq %zmm0, %zmm0
      9 ; ALL-NEXT:    retq
     10   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
     11   ret <8 x i64> %out
     12 }
     13 
     14 define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
     15 ; ALL-LABEL: testv8i64u:
     16 ; ALL:       ## BB#0:
     17 ; ALL-NEXT:    vplzcntq %zmm0, %zmm0
     18 ; ALL-NEXT:    retq
     19   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
     20   ret <8 x i64> %out
     21 }
     22 
     23 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
     24 ; ALL-LABEL: testv16i32:
     25 ; ALL:       ## BB#0:
     26 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     27 ; ALL-NEXT:    retq
     28   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
     29   ret <16 x i32> %out
     30 }
     31 
     32 define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
     33 ; ALL-LABEL: testv16i32u:
     34 ; ALL:       ## BB#0:
     35 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     36 ; ALL-NEXT:    retq
     37   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
     38   ret <16 x i32> %out
     39 }
     40 
     41 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
     42 ; AVX512CD-LABEL: testv32i16:
     43 ; AVX512CD:       ## BB#0:
     44 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     45 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
     46 ; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
     47 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     48 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     49 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     50 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
     51 ; AVX512CD-NEXT:    vpmovdw %zmm1, %ymm1
     52 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     53 ; AVX512CD-NEXT:    retq
     54 ;
     55 ; AVX512BW-LABEL: testv32i16:
     56 ; AVX512BW:       ## BB#0:
     57 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     58 ; AVX512BW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     59 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
     60 ; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
     61 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     62 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     63 ; AVX512BW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     64 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
     65 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
     66 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     67 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     68 ; AVX512BW-NEXT:    retq
     69   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
     70   ret <32 x i16> %out
     71 }
     72 
     73 define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
     74 ; AVX512CD-LABEL: testv32i16u:
     75 ; AVX512CD:       ## BB#0:
     76 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     77 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
     78 ; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
     79 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     80 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     81 ; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     82 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
     83 ; AVX512CD-NEXT:    vpmovdw %zmm1, %ymm1
     84 ; AVX512CD-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     85 ; AVX512CD-NEXT:    retq
     86 ;
     87 ; AVX512BW-LABEL: testv32i16u:
     88 ; AVX512BW:       ## BB#0:
     89 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     90 ; AVX512BW-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
     91 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
     92 ; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
     93 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     94 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     95 ; AVX512BW-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
     96 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
     97 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
     98 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     99 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    100 ; AVX512BW-NEXT:    retq
    101   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
    102   ret <32 x i16> %out
    103 }
    104 
    105 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
    106 ; AVX512CD-LABEL: testv64i8:
    107 ; AVX512CD:       ## BB#0:
    108 ; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm2
    109 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    110 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    111 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    112 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    113 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    114 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    115 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    116 ; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
    117 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    118 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    119 ; AVX512CD-NEXT:    vextractf128 $1, %ymm1, %xmm2
    120 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    121 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    122 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    123 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    124 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    125 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    126 ; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
    127 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    128 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    129 ; AVX512CD-NEXT:    retq
    130 ;
    131 ; AVX512BW-LABEL: testv64i8:
    132 ; AVX512BW:       ## BB#0:
    133 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    134 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    135 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    136 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    137 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    138 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    139 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    140 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    141 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
    142 ; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
    143 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    144 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    145 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    146 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    147 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    148 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    149 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    150 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    151 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
    152 ; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
    153 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    154 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    155 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    156 ; AVX512BW-NEXT:    retq
    157   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
    158   ret <64 x i8> %out
    159 }
    160 
    161 define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
    162 ; AVX512CD-LABEL: testv64i8u:
    163 ; AVX512CD:       ## BB#0:
    164 ; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm2
    165 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    166 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    167 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    168 ; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    169 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    170 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    171 ; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
    172 ; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
    173 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    174 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    175 ; AVX512CD-NEXT:    vextractf128 $1, %ymm1, %xmm2
    176 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    177 ; AVX512CD-NEXT:    vplzcntd %zmm2, %zmm2
    178 ; AVX512CD-NEXT:    vpmovdb %zmm2, %xmm2
    179 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    180 ; AVX512CD-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    181 ; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
    182 ; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
    183 ; AVX512CD-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    184 ; AVX512CD-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    185 ; AVX512CD-NEXT:    retq
    186 ;
    187 ; AVX512BW-LABEL: testv64i8u:
    188 ; AVX512BW:       ## BB#0:
    189 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    190 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    191 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    192 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    193 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    194 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    195 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    196 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
    197 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
    198 ; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
    199 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    200 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    201 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    202 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero
    203 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    204 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    205 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    206 ; AVX512BW-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
    207 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
    208 ; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
    209 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    210 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    211 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    212 ; AVX512BW-NEXT:    retq
    213   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
    214   ret <64 x i8> %out
    215 }
    216 
    217 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
    218 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
    219 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
    220 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
    221