1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW 3 4 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { 5 ; ALL-LABEL: testv8i64: 6 ; ALL: ## BB#0: 7 ; ALL-NEXT: vplzcntq %zmm0, %zmm0 8 ; ALL-NEXT: retq 9 %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0) 10 ret <8 x i64> %out 11 } 12 13 define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { 14 ; ALL-LABEL: testv8i64u: 15 ; ALL: ## BB#0: 16 ; ALL-NEXT: vplzcntq %zmm0, %zmm0 17 ; ALL-NEXT: retq 18 %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1) 19 ret <8 x i64> %out 20 } 21 22 define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { 23 ; ALL-LABEL: testv16i32: 24 ; ALL: ## BB#0: 25 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 26 ; ALL-NEXT: retq 27 %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0) 28 ret <16 x i32> %out 29 } 30 31 define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { 32 ; ALL-LABEL: testv16i32u: 33 ; ALL: ## BB#0: 34 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 35 ; ALL-NEXT: retq 36 %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1) 37 ret <16 x i32> %out 38 } 39 40 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind { 41 ; ALL-LABEL: testv32i16: 42 ; ALL: ## BB#0: 43 ; ALL-NEXT: vpmovzxwd %ymm0, %zmm0 44 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 45 ; ALL-NEXT: vpmovdw %zmm0, %ymm0 46 ; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 47 ; ALL-NEXT: vpsubw %ymm2, %ymm0, %ymm0 48 ; ALL-NEXT: vpmovzxwd %ymm1, %zmm1 49 ; ALL-NEXT: vplzcntd %zmm1, %zmm1 50 ; ALL-NEXT: vpmovdw %zmm1, %ymm1 51 ; ALL-NEXT: vpsubw %ymm2, %ymm1, %ymm1 52 ; ALL-NEXT: retq 53 ; 54 ; AVX512BW-LABEL: testv32i16: 55 ; AVX512BW: ## BB#0: 56 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 57 ; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1 58 ; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1 59 ; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1 60 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 61 ; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1 62 ; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0 63 ; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0 64 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 65 ; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0 66 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 67 ; AVX512BW-NEXT: retq 68 %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0) 69 ret <32 x i16> %out 70 } 71 72 define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind { 73 ; ALL-LABEL: testv32i16u: 74 ; ALL: ## BB#0: 75 ; ALL-NEXT: vpmovzxwd %ymm0, %zmm0 76 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 77 ; ALL-NEXT: vpmovdw %zmm0, %ymm0 78 ; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 79 ; ALL-NEXT: vpsubw %ymm2, %ymm0, %ymm0 80 ; ALL-NEXT: vpmovzxwd %ymm1, %zmm1 81 ; ALL-NEXT: vplzcntd %zmm1, %zmm1 82 ; ALL-NEXT: vpmovdw %zmm1, %ymm1 83 ; ALL-NEXT: vpsubw %ymm2, %ymm1, %ymm1 84 ; ALL-NEXT: retq 85 ; 86 ; AVX512BW-LABEL: testv32i16u: 87 ; AVX512BW: ## BB#0: 88 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 89 ; AVX512BW-NEXT: vpmovzxwd %ymm1, %zmm1 90 ; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1 91 ; AVX512BW-NEXT: vpmovdw %zmm1, %ymm1 92 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] 93 ; AVX512BW-NEXT: vpsubw %ymm2, %ymm1, %ymm1 94 ; AVX512BW-NEXT: vpmovzxwd %ymm0, %zmm0 95 ; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0 96 ; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 97 ; AVX512BW-NEXT: vpsubw %ymm2, %ymm0, %ymm0 98 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 99 ; AVX512BW-NEXT: retq 100 %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1) 101 ret <32 x i16> %out 102 } 103 104 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind { 105 ; ALL-LABEL: testv64i8: 106 ; ALL: ## BB#0: 107 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2 108 ; ALL-NEXT: vpmovzxbd %xmm2, %zmm2 109 ; ALL-NEXT: vplzcntd %zmm2, %zmm2 110 ; ALL-NEXT: vpmovdb %zmm2, %xmm2 111 ; ALL-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 112 ; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2 113 ; ALL-NEXT: vpmovzxbd %xmm0, %zmm0 114 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 115 ; ALL-NEXT: vpmovdb %zmm0, %xmm0 116 ; ALL-NEXT: vpsubb %xmm3, %xmm0, %xmm0 117 ; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 118 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm2 119 ; ALL-NEXT: vpmovzxbd %xmm2, %zmm2 120 ; ALL-NEXT: vplzcntd %zmm2, %zmm2 121 ; ALL-NEXT: vpmovdb %zmm2, %xmm2 122 ; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2 123 ; ALL-NEXT: vpmovzxbd %xmm1, %zmm1 124 ; ALL-NEXT: vplzcntd %zmm1, %zmm1 125 ; ALL-NEXT: vpmovdb %zmm1, %xmm1 126 ; ALL-NEXT: vpsubb %xmm3, %xmm1, %xmm1 127 ; ALL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 128 ; ALL-NEXT: retq 129 ; 130 ; AVX512BW-LABEL: testv64i8: 131 ; AVX512BW: ## BB#0: 132 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 133 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 134 ; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2 135 ; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2 136 ; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2 137 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 138 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 139 ; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1 140 ; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1 141 ; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1 142 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1 143 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 144 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 145 ; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2 146 ; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2 147 ; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2 148 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 149 ; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0 150 ; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0 151 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 152 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0 153 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 154 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 155 ; AVX512BW-NEXT: retq 156 %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0) 157 ret <64 x i8> %out 158 } 159 160 define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind { 161 ; ALL-LABEL: testv64i8u: 162 ; ALL: ## BB#0: 163 ; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2 164 ; ALL-NEXT: vpmovzxbd %xmm2, %zmm2 165 ; ALL-NEXT: vplzcntd %zmm2, %zmm2 166 ; ALL-NEXT: vpmovdb %zmm2, %xmm2 167 ; ALL-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 168 ; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2 169 ; ALL-NEXT: vpmovzxbd %xmm0, %zmm0 170 ; ALL-NEXT: vplzcntd %zmm0, %zmm0 171 ; ALL-NEXT: vpmovdb %zmm0, %xmm0 172 ; ALL-NEXT: vpsubb %xmm3, %xmm0, %xmm0 173 ; ALL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 174 ; ALL-NEXT: vextractf128 $1, %ymm1, %xmm2 175 ; ALL-NEXT: vpmovzxbd %xmm2, %zmm2 176 ; ALL-NEXT: vplzcntd %zmm2, %zmm2 177 ; ALL-NEXT: vpmovdb %zmm2, %xmm2 178 ; ALL-NEXT: vpsubb %xmm3, %xmm2, %xmm2 179 ; ALL-NEXT: vpmovzxbd %xmm1, %zmm1 180 ; ALL-NEXT: vplzcntd %zmm1, %zmm1 181 ; ALL-NEXT: vpmovdb %zmm1, %xmm1 182 ; ALL-NEXT: vpsubb %xmm3, %xmm1, %xmm1 183 ; ALL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 184 ; ALL-NEXT: retq 185 ; 186 ; AVX512BW-LABEL: testv64i8u: 187 ; AVX512BW: ## BB#0: 188 ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 189 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 190 ; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2 191 ; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2 192 ; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2 193 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24] 194 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 195 ; AVX512BW-NEXT: vpmovzxbd %xmm1, %zmm1 196 ; AVX512BW-NEXT: vplzcntd %zmm1, %zmm1 197 ; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1 198 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm1, %xmm1 199 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 200 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm2 201 ; AVX512BW-NEXT: vpmovzxbd %xmm2, %zmm2 202 ; AVX512BW-NEXT: vplzcntd %zmm2, %zmm2 203 ; AVX512BW-NEXT: vpmovdb %zmm2, %xmm2 204 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm2, %xmm2 205 ; AVX512BW-NEXT: vpmovzxbd %xmm0, %zmm0 206 ; AVX512BW-NEXT: vplzcntd %zmm0, %zmm0 207 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 208 ; AVX512BW-NEXT: vpsubb %xmm3, %xmm0, %xmm0 209 ; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 210 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 211 ; AVX512BW-NEXT: retq 212 %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1) 213 ret <64 x i8> %out 214 } 215 216 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) 217 declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) 218 declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1) 219 declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1) 220