Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
      4 ;CHECK: vqshrns8:
      5 ;CHECK: vqshrn.s16
      6 	%tmp1 = load <8 x i16>* %A
      7 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
      8 	ret <8 x i8> %tmp2
      9 }
     10 
     11 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
     12 ;CHECK: vqshrns16:
     13 ;CHECK: vqshrn.s32
     14 	%tmp1 = load <4 x i32>* %A
     15 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
     16 	ret <4 x i16> %tmp2
     17 }
     18 
     19 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
     20 ;CHECK: vqshrns32:
     21 ;CHECK: vqshrn.s64
     22 	%tmp1 = load <2 x i64>* %A
     23 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
     24 	ret <2 x i32> %tmp2
     25 }
     26 
     27 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
     28 ;CHECK: vqshrnu8:
     29 ;CHECK: vqshrn.u16
     30 	%tmp1 = load <8 x i16>* %A
     31 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
     32 	ret <8 x i8> %tmp2
     33 }
     34 
     35 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
     36 ;CHECK: vqshrnu16:
     37 ;CHECK: vqshrn.u32
     38 	%tmp1 = load <4 x i32>* %A
     39 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
     40 	ret <4 x i16> %tmp2
     41 }
     42 
     43 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
     44 ;CHECK: vqshrnu32:
     45 ;CHECK: vqshrn.u64
     46 	%tmp1 = load <2 x i64>* %A
     47 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
     48 	ret <2 x i32> %tmp2
     49 }
     50 
     51 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
     52 ;CHECK: vqshruns8:
     53 ;CHECK: vqshrun.s16
     54 	%tmp1 = load <8 x i16>* %A
     55 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
     56 	ret <8 x i8> %tmp2
     57 }
     58 
     59 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
     60 ;CHECK: vqshruns16:
     61 ;CHECK: vqshrun.s32
     62 	%tmp1 = load <4 x i32>* %A
     63 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
     64 	ret <4 x i16> %tmp2
     65 }
     66 
     67 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
     68 ;CHECK: vqshruns32:
     69 ;CHECK: vqshrun.s64
     70 	%tmp1 = load <2 x i64>* %A
     71 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
     72 	ret <2 x i32> %tmp2
     73 }
     74 
     75 declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     76 declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     77 declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     78 
     79 declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     80 declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     81 declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     82 
     83 declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
     84 declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
     85 declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
     86 
     87 define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
     88 ;CHECK: vqrshrns8:
     89 ;CHECK: vqrshrn.s16
     90 	%tmp1 = load <8 x i16>* %A
     91 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
     92 	ret <8 x i8> %tmp2
     93 }
     94 
     95 define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
     96 ;CHECK: vqrshrns16:
     97 ;CHECK: vqrshrn.s32
     98 	%tmp1 = load <4 x i32>* %A
     99 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
    100 	ret <4 x i16> %tmp2
    101 }
    102 
    103 define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
    104 ;CHECK: vqrshrns32:
    105 ;CHECK: vqrshrn.s64
    106 	%tmp1 = load <2 x i64>* %A
    107 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
    108 	ret <2 x i32> %tmp2
    109 }
    110 
    111 define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
    112 ;CHECK: vqrshrnu8:
    113 ;CHECK: vqrshrn.u16
    114 	%tmp1 = load <8 x i16>* %A
    115 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
    116 	ret <8 x i8> %tmp2
    117 }
    118 
    119 define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
    120 ;CHECK: vqrshrnu16:
    121 ;CHECK: vqrshrn.u32
    122 	%tmp1 = load <4 x i32>* %A
    123 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
    124 	ret <4 x i16> %tmp2
    125 }
    126 
    127 define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
    128 ;CHECK: vqrshrnu32:
    129 ;CHECK: vqrshrn.u64
    130 	%tmp1 = load <2 x i64>* %A
    131 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
    132 	ret <2 x i32> %tmp2
    133 }
    134 
    135 define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
    136 ;CHECK: vqrshruns8:
    137 ;CHECK: vqrshrun.s16
    138 	%tmp1 = load <8 x i16>* %A
    139 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
    140 	ret <8 x i8> %tmp2
    141 }
    142 
    143 define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
    144 ;CHECK: vqrshruns16:
    145 ;CHECK: vqrshrun.s32
    146 	%tmp1 = load <4 x i32>* %A
    147 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
    148 	ret <4 x i16> %tmp2
    149 }
    150 
    151 define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
    152 ;CHECK: vqrshruns32:
    153 ;CHECK: vqrshrun.s64
    154 	%tmp1 = load <2 x i64>* %A
    155 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
    156 	ret <2 x i32> %tmp2
    157 }
    158 
    159 declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    160 declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    161 declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    162 
    163 declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    164 declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    165 declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    166 
    167 declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
    168 declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
    169 declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
    170