1 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 2 ; NB: this tests vcnt, vclz, and vcls 3 4 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { 5 ;CHECK-LABEL: vcnt8: 6 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} 7 %tmp1 = load <8 x i8>, <8 x i8>* %A 8 %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) 9 ret <8 x i8> %tmp2 10 } 11 12 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { 13 ;CHECK-LABEL: vcntQ8: 14 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} 15 %tmp1 = load <16 x i8>, <16 x i8>* %A 16 %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) 17 ret <16 x i8> %tmp2 18 } 19 20 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone 21 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone 22 23 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { 24 ;CHECK-LABEL: vclz8: 25 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} 26 %tmp1 = load <8 x i8>, <8 x i8>* %A 27 %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0) 28 ret <8 x i8> %tmp2 29 } 30 31 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { 32 ;CHECK-LABEL: vclz16: 33 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} 34 %tmp1 = load <4 x i16>, <4 x i16>* %A 35 %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0) 36 ret <4 x i16> %tmp2 37 } 38 39 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { 40 ;CHECK-LABEL: vclz32: 41 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} 42 %tmp1 = load <2 x i32>, <2 x i32>* %A 43 %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0) 44 ret <2 x i32> %tmp2 45 } 46 47 define <1 x i64> @vclz64(<1 x i64>* %A) nounwind { 48 ;CHECK-LABEL: vclz64: 49 %tmp1 = load <1 x i64>, <1 x i64>* %A 50 %tmp2 = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %tmp1, i1 0) 51 ret <1 x i64> %tmp2 52 } 53 54 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { 55 ;CHECK-LABEL: vclzQ8: 56 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} 57 %tmp1 = load <16 x i8>, <16 x i8>* %A 58 %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0) 59 ret <16 x i8> %tmp2 60 } 61 62 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { 63 ;CHECK-LABEL: vclzQ16: 64 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} 65 %tmp1 = load <8 x i16>, <8 x i16>* %A 66 %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0) 67 ret <8 x i16> %tmp2 68 } 69 70 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { 71 ;CHECK-LABEL: vclzQ32: 72 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} 73 %tmp1 = load <4 x i32>, <4 x i32>* %A 74 %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0) 75 ret <4 x i32> %tmp2 76 } 77 78 define <2 x i64> @vclzQ64(<2 x i64>* %A) nounwind { 79 ;CHECK-LABEL: vclzQ64: 80 %tmp1 = load <2 x i64>, <2 x i64>* %A 81 %tmp2 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %tmp1, i1 0) 82 ret <2 x i64> %tmp2 83 } 84 85 define <8 x i8> @vclz8b(<8 x i8>* %A) nounwind { 86 ;CHECK-LABEL: vclz8b: 87 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} 88 %tmp1 = load <8 x i8>, <8 x i8>* %A 89 %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 1) 90 ret <8 x i8> %tmp2 91 } 92 93 define <4 x i16> @vclz16b(<4 x i16>* %A) nounwind { 94 ;CHECK-LABEL: vclz16b: 95 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} 96 %tmp1 = load <4 x i16>, <4 x i16>* %A 97 %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 1) 98 ret <4 x i16> %tmp2 99 } 100 101 define <2 x i32> @vclz32b(<2 x i32>* %A) nounwind { 102 ;CHECK-LABEL: vclz32b: 103 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} 104 %tmp1 = load <2 x i32>, <2 x i32>* %A 105 %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 1) 106 ret <2 x i32> %tmp2 107 } 108 109 define <1 x i64> @vclz64b(<1 x i64>* %A) nounwind { 110 ;CHECK-LABEL: vclz64b: 111 %tmp1 = load <1 x i64>, <1 x i64>* %A 112 %tmp2 = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %tmp1, i1 1) 113 ret <1 x i64> %tmp2 114 } 115 116 define <16 x i8> @vclzQ8b(<16 x i8>* %A) nounwind { 117 ;CHECK-LABEL: vclzQ8b: 118 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} 119 %tmp1 = load <16 x i8>, <16 x i8>* %A 120 %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 1) 121 ret <16 x i8> %tmp2 122 } 123 124 define <8 x i16> @vclzQ16b(<8 x i16>* %A) nounwind { 125 ;CHECK-LABEL: vclzQ16b: 126 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} 127 %tmp1 = load <8 x i16>, <8 x i16>* %A 128 %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 1) 129 ret <8 x i16> %tmp2 130 } 131 132 define <4 x i32> @vclzQ32b(<4 x i32>* %A) nounwind { 133 ;CHECK-LABEL: vclzQ32b: 134 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} 135 %tmp1 = load <4 x i32>, <4 x i32>* %A 136 %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 1) 137 ret <4 x i32> %tmp2 138 } 139 140 define <2 x i64> @vclzQ64b(<2 x i64>* %A) nounwind { 141 ;CHECK-LABEL: vclzQ64b: 142 %tmp1 = load <2 x i64>, <2 x i64>* %A 143 %tmp2 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %tmp1, i1 1) 144 ret <2 x i64> %tmp2 145 } 146 147 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone 148 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone 149 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone 150 declare <1 x i64> @llvm.ctlz.v1i64(<1 x i64>, i1) nounwind readnone 151 152 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone 153 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone 154 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone 155 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone 156 157 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { 158 ;CHECK-LABEL: vclss8: 159 ;CHECK: vcls.s8 160 %tmp1 = load <8 x i8>, <8 x i8>* %A 161 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) 162 ret <8 x i8> %tmp2 163 } 164 165 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { 166 ;CHECK-LABEL: vclss16: 167 ;CHECK: vcls.s16 168 %tmp1 = load <4 x i16>, <4 x i16>* %A 169 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) 170 ret <4 x i16> %tmp2 171 } 172 173 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { 174 ;CHECK-LABEL: vclss32: 175 ;CHECK: vcls.s32 176 %tmp1 = load <2 x i32>, <2 x i32>* %A 177 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) 178 ret <2 x i32> %tmp2 179 } 180 181 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { 182 ;CHECK-LABEL: vclsQs8: 183 ;CHECK: vcls.s8 184 %tmp1 = load <16 x i8>, <16 x i8>* %A 185 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) 186 ret <16 x i8> %tmp2 187 } 188 189 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { 190 ;CHECK-LABEL: vclsQs16: 191 ;CHECK: vcls.s16 192 %tmp1 = load <8 x i16>, <8 x i16>* %A 193 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) 194 ret <8 x i16> %tmp2 195 } 196 197 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { 198 ;CHECK-LABEL: vclsQs32: 199 ;CHECK: vcls.s32 200 %tmp1 = load <4 x i32>, <4 x i32>* %A 201 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) 202 ret <4 x i32> %tmp2 203 } 204 205 declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone 206 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone 207 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone 208 209 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone 210 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone 211 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone 212