Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
      3 
      4 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
      5 ; ALL-LABEL: testv8i64:
      6 ; ALL:       ## BB#0:
      7 ; ALL-NEXT:    vplzcntq %zmm0, %zmm0
      8 ; ALL-NEXT:    retq
      9   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
     10   ret <8 x i64> %out
     11 }
     12 
     13 define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
     14 ; ALL-LABEL: testv8i64u:
     15 ; ALL:       ## BB#0:
     16 ; ALL-NEXT:    vplzcntq %zmm0, %zmm0
     17 ; ALL-NEXT:    retq
     18   %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
     19   ret <8 x i64> %out
     20 }
     21 
     22 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
     23 ; ALL-LABEL: testv16i32:
     24 ; ALL:       ## BB#0:
     25 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     26 ; ALL-NEXT:    retq
     27   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
     28   ret <16 x i32> %out
     29 }
     30 
     31 define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
     32 ; ALL-LABEL: testv16i32u:
     33 ; ALL:       ## BB#0:
     34 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     35 ; ALL-NEXT:    retq
     36   %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
     37   ret <16 x i32> %out
     38 }
     39 
     40 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
     41 ; ALL-LABEL: testv32i16:
     42 ; ALL:       ## BB#0:
     43 ; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
     44 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     45 ; ALL-NEXT:    vpmovdw %zmm0, %ymm0
     46 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     47 ; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     48 ; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
     49 ; ALL-NEXT:    vplzcntd %zmm1, %zmm1
     50 ; ALL-NEXT:    vpmovdw %zmm1, %ymm1
     51 ; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     52 ; ALL-NEXT:    retq
     53 ;
     54 ; AVX512BW-LABEL: testv32i16:
     55 ; AVX512BW:       ## BB#0:
     56 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     57 ; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
     58 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
     59 ; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
     60 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     61 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     62 ; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
     63 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
     64 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
     65 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     66 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     67 ; AVX512BW-NEXT:    retq
     68   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
     69   ret <32 x i16> %out
     70 }
     71 
     72 define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
     73 ; ALL-LABEL: testv32i16u:
     74 ; ALL:       ## BB#0:
     75 ; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
     76 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
     77 ; ALL-NEXT:    vpmovdw %zmm0, %ymm0
     78 ; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     79 ; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     80 ; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
     81 ; ALL-NEXT:    vplzcntd %zmm1, %zmm1
     82 ; ALL-NEXT:    vpmovdw %zmm1, %ymm1
     83 ; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     84 ; ALL-NEXT:    retq
     85 ;
     86 ; AVX512BW-LABEL: testv32i16u:
     87 ; AVX512BW:       ## BB#0:
     88 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
     89 ; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
     90 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
     91 ; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
     92 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
     93 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
     94 ; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
     95 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
     96 ; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
     97 ; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
     98 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
     99 ; AVX512BW-NEXT:    retq
    100   %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
    101   ret <32 x i16> %out
    102 }
    103 
    104 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
    105 ; ALL-LABEL: testv64i8:
    106 ; ALL:       ## BB#0:
    107 ; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
    108 ; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
    109 ; ALL-NEXT:    vplzcntd %zmm2, %zmm2
    110 ; ALL-NEXT:    vpmovdb %zmm2, %xmm2
    111 ; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    112 ; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    113 ; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
    114 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
    115 ; ALL-NEXT:    vpmovdb %zmm0, %xmm0
    116 ; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    117 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    118 ; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
    119 ; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
    120 ; ALL-NEXT:    vplzcntd %zmm2, %zmm2
    121 ; ALL-NEXT:    vpmovdb %zmm2, %xmm2
    122 ; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    123 ; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
    124 ; ALL-NEXT:    vplzcntd %zmm1, %zmm1
    125 ; ALL-NEXT:    vpmovdb %zmm1, %xmm1
    126 ; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    127 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    128 ; ALL-NEXT:    retq
    129 ;
    130 ; AVX512BW-LABEL: testv64i8:
    131 ; AVX512BW:       ## BB#0:
    132 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    133 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    134 ; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
    135 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    136 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    137 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    138 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    139 ; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
    140 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
    141 ; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
    142 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    143 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    144 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    145 ; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
    146 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    147 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    148 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    149 ; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
    150 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
    151 ; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
    152 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    153 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    154 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    155 ; AVX512BW-NEXT:    retq
    156   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
    157   ret <64 x i8> %out
    158 }
    159 
    160 define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
    161 ; ALL-LABEL: testv64i8u:
    162 ; ALL:       ## BB#0:
    163 ; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
    164 ; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
    165 ; ALL-NEXT:    vplzcntd %zmm2, %zmm2
    166 ; ALL-NEXT:    vpmovdb %zmm2, %xmm2
    167 ; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    168 ; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    169 ; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
    170 ; ALL-NEXT:    vplzcntd %zmm0, %zmm0
    171 ; ALL-NEXT:    vpmovdb %zmm0, %xmm0
    172 ; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    173 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    174 ; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
    175 ; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
    176 ; ALL-NEXT:    vplzcntd %zmm2, %zmm2
    177 ; ALL-NEXT:    vpmovdb %zmm2, %xmm2
    178 ; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    179 ; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
    180 ; ALL-NEXT:    vplzcntd %zmm1, %zmm1
    181 ; ALL-NEXT:    vpmovdb %zmm1, %xmm1
    182 ; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    183 ; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    184 ; ALL-NEXT:    retq
    185 ;
    186 ; AVX512BW-LABEL: testv64i8u:
    187 ; AVX512BW:       ## BB#0:
    188 ; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
    189 ; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
    190 ; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
    191 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    192 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    193 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
    194 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    195 ; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
    196 ; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
    197 ; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
    198 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
    199 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
    200 ; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
    201 ; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
    202 ; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
    203 ; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
    204 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
    205 ; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
    206 ; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
    207 ; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
    208 ; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
    209 ; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
    210 ; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
    211 ; AVX512BW-NEXT:    retq
    212   %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
    213   ret <64 x i8> %out
    214 }
    215 
    216 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
    217 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
    218 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
    219 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
    220