Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vhadds8:
      5 ;CHECK: vhadd.s8
      6 	%tmp1 = load <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>* %B
      8 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vhadds16:
     14 ;CHECK: vhadd.s16
     15 	%tmp1 = load <4 x i16>* %A
     16 	%tmp2 = load <4 x i16>* %B
     17 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     18 	ret <4 x i16> %tmp3
     19 }
     20 
     21 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vhadds32:
     23 ;CHECK: vhadd.s32
     24 	%tmp1 = load <2 x i32>* %A
     25 	%tmp2 = load <2 x i32>* %B
     26 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     27 	ret <2 x i32> %tmp3
     28 }
     29 
     30 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     31 ;CHECK: vhaddu8:
     32 ;CHECK: vhadd.u8
     33 	%tmp1 = load <8 x i8>* %A
     34 	%tmp2 = load <8 x i8>* %B
     35 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     36 	ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     40 ;CHECK: vhaddu16:
     41 ;CHECK: vhadd.u16
     42 	%tmp1 = load <4 x i16>* %A
     43 	%tmp2 = load <4 x i16>* %B
     44 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     45 	ret <4 x i16> %tmp3
     46 }
     47 
     48 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     49 ;CHECK: vhaddu32:
     50 ;CHECK: vhadd.u32
     51 	%tmp1 = load <2 x i32>* %A
     52 	%tmp2 = load <2 x i32>* %B
     53 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     54 	ret <2 x i32> %tmp3
     55 }
     56 
     57 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     58 ;CHECK: vhaddQs8:
     59 ;CHECK: vhadd.s8
     60 	%tmp1 = load <16 x i8>* %A
     61 	%tmp2 = load <16 x i8>* %B
     62 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     63 	ret <16 x i8> %tmp3
     64 }
     65 
     66 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     67 ;CHECK: vhaddQs16:
     68 ;CHECK: vhadd.s16
     69 	%tmp1 = load <8 x i16>* %A
     70 	%tmp2 = load <8 x i16>* %B
     71 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     72 	ret <8 x i16> %tmp3
     73 }
     74 
     75 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     76 ;CHECK: vhaddQs32:
     77 ;CHECK: vhadd.s32
     78 	%tmp1 = load <4 x i32>* %A
     79 	%tmp2 = load <4 x i32>* %B
     80 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     81 	ret <4 x i32> %tmp3
     82 }
     83 
     84 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     85 ;CHECK: vhaddQu8:
     86 ;CHECK: vhadd.u8
     87 	%tmp1 = load <16 x i8>* %A
     88 	%tmp2 = load <16 x i8>* %B
     89 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     90 	ret <16 x i8> %tmp3
     91 }
     92 
     93 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     94 ;CHECK: vhaddQu16:
     95 ;CHECK: vhadd.u16
     96 	%tmp1 = load <8 x i16>* %A
     97 	%tmp2 = load <8 x i16>* %B
     98 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     99 	ret <8 x i16> %tmp3
    100 }
    101 
    102 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    103 ;CHECK: vhaddQu32:
    104 ;CHECK: vhadd.u32
    105 	%tmp1 = load <4 x i32>* %A
    106 	%tmp2 = load <4 x i32>* %B
    107 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    108 	ret <4 x i32> %tmp3
    109 }
    110 
    111 declare <8 x i8>  @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    112 declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    113 declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    114 
    115 declare <8 x i8>  @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    116 declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    117 declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    118 
    119 declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    120 declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    121 declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    122 
    123 declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    124 declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    125 declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    126 
    127 define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    128 ;CHECK: vrhadds8:
    129 ;CHECK: vrhadd.s8
    130 	%tmp1 = load <8 x i8>* %A
    131 	%tmp2 = load <8 x i8>* %B
    132 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    133 	ret <8 x i8> %tmp3
    134 }
    135 
    136 define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    137 ;CHECK: vrhadds16:
    138 ;CHECK: vrhadd.s16
    139 	%tmp1 = load <4 x i16>* %A
    140 	%tmp2 = load <4 x i16>* %B
    141 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    142 	ret <4 x i16> %tmp3
    143 }
    144 
    145 define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    146 ;CHECK: vrhadds32:
    147 ;CHECK: vrhadd.s32
    148 	%tmp1 = load <2 x i32>* %A
    149 	%tmp2 = load <2 x i32>* %B
    150 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    151 	ret <2 x i32> %tmp3
    152 }
    153 
    154 define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    155 ;CHECK: vrhaddu8:
    156 ;CHECK: vrhadd.u8
    157 	%tmp1 = load <8 x i8>* %A
    158 	%tmp2 = load <8 x i8>* %B
    159 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    160 	ret <8 x i8> %tmp3
    161 }
    162 
    163 define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    164 ;CHECK: vrhaddu16:
    165 ;CHECK: vrhadd.u16
    166 	%tmp1 = load <4 x i16>* %A
    167 	%tmp2 = load <4 x i16>* %B
    168 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    169 	ret <4 x i16> %tmp3
    170 }
    171 
    172 define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    173 ;CHECK: vrhaddu32:
    174 ;CHECK: vrhadd.u32
    175 	%tmp1 = load <2 x i32>* %A
    176 	%tmp2 = load <2 x i32>* %B
    177 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    178 	ret <2 x i32> %tmp3
    179 }
    180 
    181 define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    182 ;CHECK: vrhaddQs8:
    183 ;CHECK: vrhadd.s8
    184 	%tmp1 = load <16 x i8>* %A
    185 	%tmp2 = load <16 x i8>* %B
    186 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    187 	ret <16 x i8> %tmp3
    188 }
    189 
    190 define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    191 ;CHECK: vrhaddQs16:
    192 ;CHECK: vrhadd.s16
    193 	%tmp1 = load <8 x i16>* %A
    194 	%tmp2 = load <8 x i16>* %B
    195 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    196 	ret <8 x i16> %tmp3
    197 }
    198 
    199 define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    200 ;CHECK: vrhaddQs32:
    201 ;CHECK: vrhadd.s32
    202 	%tmp1 = load <4 x i32>* %A
    203 	%tmp2 = load <4 x i32>* %B
    204 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    205 	ret <4 x i32> %tmp3
    206 }
    207 
    208 define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    209 ;CHECK: vrhaddQu8:
    210 ;CHECK: vrhadd.u8
    211 	%tmp1 = load <16 x i8>* %A
    212 	%tmp2 = load <16 x i8>* %B
    213 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    214 	ret <16 x i8> %tmp3
    215 }
    216 
    217 define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    218 ;CHECK: vrhaddQu16:
    219 ;CHECK: vrhadd.u16
    220 	%tmp1 = load <8 x i16>* %A
    221 	%tmp2 = load <8 x i16>* %B
    222 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    223 	ret <8 x i16> %tmp3
    224 }
    225 
    226 define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    227 ;CHECK: vrhaddQu32:
    228 ;CHECK: vrhadd.u32
    229 	%tmp1 = load <4 x i32>* %A
    230 	%tmp2 = load <4 x i32>* %B
    231 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    232 	ret <4 x i32> %tmp3
    233 }
    234 
    235 declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    236 declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    237 declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    238 
    239 declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    240 declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    241 declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    242 
    243 declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    244 declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    245 declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    246 
    247 declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    248 declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    249 declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    250