Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 ; NB: this tests vcnt, vclz, and vcls
      3 
      4 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
      5 ;CHECK: vcnt8:
      6 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
      7 	%tmp1 = load <8 x i8>* %A
      8 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
      9 	ret <8 x i8> %tmp2
     10 }
     11 
     12 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
     13 ;CHECK: vcntQ8:
     14 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
     15 	%tmp1 = load <16 x i8>* %A
     16 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
     17 	ret <16 x i8> %tmp2
     18 }
     19 
     20 declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
     21 declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
     22 
     23 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
     24 ;CHECK: vclz8:
     25 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
     26 	%tmp1 = load <8 x i8>* %A
     27 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
     28 	ret <8 x i8> %tmp2
     29 }
     30 
     31 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
     32 ;CHECK: vclz16:
     33 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
     34 	%tmp1 = load <4 x i16>* %A
     35 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
     36 	ret <4 x i16> %tmp2
     37 }
     38 
     39 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
     40 ;CHECK: vclz32:
     41 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
     42 	%tmp1 = load <2 x i32>* %A
     43 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
     44 	ret <2 x i32> %tmp2
     45 }
     46 
     47 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
     48 ;CHECK: vclzQ8:
     49 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
     50 	%tmp1 = load <16 x i8>* %A
     51 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
     52 	ret <16 x i8> %tmp2
     53 }
     54 
     55 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
     56 ;CHECK: vclzQ16:
     57 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
     58 	%tmp1 = load <8 x i16>* %A
     59 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
     60 	ret <8 x i16> %tmp2
     61 }
     62 
     63 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
     64 ;CHECK: vclzQ32:
     65 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
     66 	%tmp1 = load <4 x i32>* %A
     67 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
     68 	ret <4 x i32> %tmp2
     69 }
     70 
     71 declare <8 x i8>  @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
     72 declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
     73 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
     74 
     75 declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
     76 declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
     77 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
     78 
     79 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
     80 ;CHECK: vclss8:
     81 ;CHECK: vcls.s8
     82 	%tmp1 = load <8 x i8>* %A
     83 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
     84 	ret <8 x i8> %tmp2
     85 }
     86 
     87 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
     88 ;CHECK: vclss16:
     89 ;CHECK: vcls.s16
     90 	%tmp1 = load <4 x i16>* %A
     91 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
     92 	ret <4 x i16> %tmp2
     93 }
     94 
     95 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
     96 ;CHECK: vclss32:
     97 ;CHECK: vcls.s32
     98 	%tmp1 = load <2 x i32>* %A
     99 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
    100 	ret <2 x i32> %tmp2
    101 }
    102 
    103 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
    104 ;CHECK: vclsQs8:
    105 ;CHECK: vcls.s8
    106 	%tmp1 = load <16 x i8>* %A
    107 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
    108 	ret <16 x i8> %tmp2
    109 }
    110 
    111 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
    112 ;CHECK: vclsQs16:
    113 ;CHECK: vcls.s16
    114 	%tmp1 = load <8 x i16>* %A
    115 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
    116 	ret <8 x i16> %tmp2
    117 }
    118 
    119 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
    120 ;CHECK: vclsQs32:
    121 ;CHECK: vcls.s32
    122 	%tmp1 = load <4 x i32>* %A
    123 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
    124 	ret <4 x i32> %tmp2
    125 }
    126 
    127 declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
    128 declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
    129 declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
    130 
    131 declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
    132 declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
    133 declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
    134