1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s 2 ; NB: this tests vcnt, vclz, and vcls 3 4 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { 5 ;CHECK: vcnt8: 6 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} 7 %tmp1 = load <8 x i8>* %A 8 %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) 9 ret <8 x i8> %tmp2 10 } 11 12 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { 13 ;CHECK: vcntQ8: 14 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} 15 %tmp1 = load <16 x i8>* %A 16 %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) 17 ret <16 x i8> %tmp2 18 } 19 20 declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone 21 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone 22 23 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { 24 ;CHECK: vclz8: 25 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} 26 %tmp1 = load <8 x i8>* %A 27 %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0) 28 ret <8 x i8> %tmp2 29 } 30 31 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { 32 ;CHECK: vclz16: 33 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} 34 %tmp1 = load <4 x i16>* %A 35 %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0) 36 ret <4 x i16> %tmp2 37 } 38 39 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { 40 ;CHECK: vclz32: 41 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} 42 %tmp1 = load <2 x i32>* %A 43 %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0) 44 ret <2 x i32> %tmp2 45 } 46 47 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { 48 ;CHECK: vclzQ8: 49 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} 50 %tmp1 = load <16 x i8>* %A 51 %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0) 52 ret <16 x i8> %tmp2 53 } 54 55 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { 56 ;CHECK: vclzQ16: 57 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} 58 %tmp1 = load <8 x i16>* %A 59 %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0) 60 ret <8 x i16> %tmp2 61 } 62 63 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { 64 ;CHECK: vclzQ32: 65 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} 66 %tmp1 = load <4 x i32>* %A 67 %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0) 68 ret <4 x i32> %tmp2 69 } 70 71 declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone 72 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone 73 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone 74 75 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone 76 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone 77 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone 78 79 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { 80 ;CHECK: vclss8: 81 ;CHECK: vcls.s8 82 %tmp1 = load <8 x i8>* %A 83 %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) 84 ret <8 x i8> %tmp2 85 } 86 87 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { 88 ;CHECK: vclss16: 89 ;CHECK: vcls.s16 90 %tmp1 = load <4 x i16>* %A 91 %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) 92 ret <4 x i16> %tmp2 93 } 94 95 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { 96 ;CHECK: vclss32: 97 ;CHECK: vcls.s32 98 %tmp1 = load <2 x i32>* %A 99 %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) 100 ret <2 x i32> %tmp2 101 } 102 103 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { 104 ;CHECK: vclsQs8: 105 ;CHECK: vcls.s8 106 %tmp1 = load <16 x i8>* %A 107 %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) 108 ret <16 x i8> %tmp2 109 } 110 111 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { 112 ;CHECK: vclsQs16: 113 ;CHECK: vcls.s16 114 %tmp1 = load <8 x i16>* %A 115 %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) 116 ret <8 x i16> %tmp2 117 } 118 119 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { 120 ;CHECK: vclsQs32: 121 ;CHECK: vcls.s32 122 %tmp1 = load <4 x i32>* %A 123 %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) 124 ret <4 x i32> %tmp2 125 } 126 127 declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone 128 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone 129 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone 130 131 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone 132 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone 133 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone 134