Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
      2 
      3 define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK-LABEL: shadd8b:
      5 ;CHECK: shadd.8b
      6 	%tmp1 = load <8 x i8>, <8 x i8>* %A
      7 	%tmp2 = load <8 x i8>, <8 x i8>* %B
      8 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
      9 	ret <8 x i8> %tmp3
     10 }
     11 
     12 define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     13 ;CHECK-LABEL: shadd16b:
     14 ;CHECK: shadd.16b
     15 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     16 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     17 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     18 	ret <16 x i8> %tmp3
     19 }
     20 
     21 define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     22 ;CHECK-LABEL: shadd4h:
     23 ;CHECK: shadd.4h
     24 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     25 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     26 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     27 	ret <4 x i16> %tmp3
     28 }
     29 
     30 define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     31 ;CHECK-LABEL: shadd8h:
     32 ;CHECK: shadd.8h
     33 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     34 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     35 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     36 	ret <8 x i16> %tmp3
     37 }
     38 
     39 define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     40 ;CHECK-LABEL: shadd2s:
     41 ;CHECK: shadd.2s
     42 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     43 	%tmp2 = load <2 x i32>, <2 x i32>* %B
     44 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     45 	ret <2 x i32> %tmp3
     46 }
     47 
     48 define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
     49 ;CHECK-LABEL: shadd4s:
     50 ;CHECK: shadd.4s
     51 	%tmp1 = load <4 x i32>, <4 x i32>* %A
     52 	%tmp2 = load <4 x i32>, <4 x i32>* %B
     53 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
     54 	ret <4 x i32> %tmp3
     55 }
     56 
     57 define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
     58 ;CHECK-LABEL: uhadd8b:
     59 ;CHECK: uhadd.8b
     60 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     61 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     62 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
     63 	ret <8 x i8> %tmp3
     64 }
     65 
     66 define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
     67 ;CHECK-LABEL: uhadd16b:
     68 ;CHECK: uhadd.16b
     69 	%tmp1 = load <16 x i8>, <16 x i8>* %A
     70 	%tmp2 = load <16 x i8>, <16 x i8>* %B
     71 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
     72 	ret <16 x i8> %tmp3
     73 }
     74 
     75 define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     76 ;CHECK-LABEL: uhadd4h:
     77 ;CHECK: uhadd.4h
     78 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     79 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     80 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
     81 	ret <4 x i16> %tmp3
     82 }
     83 
     84 define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
     85 ;CHECK-LABEL: uhadd8h:
     86 ;CHECK: uhadd.8h
     87 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     88 	%tmp2 = load <8 x i16>, <8 x i16>* %B
     89 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
     90 	ret <8 x i16> %tmp3
     91 }
     92 
     93 define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     94 ;CHECK-LABEL: uhadd2s:
     95 ;CHECK: uhadd.2s
     96 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     97 	%tmp2 = load <2 x i32>, <2 x i32>* %B
     98 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
     99 	ret <2 x i32> %tmp3
    100 }
    101 
    102 define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    103 ;CHECK-LABEL: uhadd4s:
    104 ;CHECK: uhadd.4s
    105 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    106 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    107 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    108 	ret <4 x i32> %tmp3
    109 }
    110 
    111 declare <8 x i8>  @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    112 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    113 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    114 
    115 declare <8 x i8>  @llvm.aarch64.neon.uhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    116 declare <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    117 declare <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    118 
    119 declare <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    120 declare <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    121 declare <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    122 
    123 declare <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    124 declare <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    125 declare <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    126 
    127 define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    128 ;CHECK-LABEL: srhadd8b:
    129 ;CHECK: srhadd.8b
    130 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    131 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    132 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    133 	ret <8 x i8> %tmp3
    134 }
    135 
    136 define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    137 ;CHECK-LABEL: srhadd16b:
    138 ;CHECK: srhadd.16b
    139 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    140 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    141 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    142 	ret <16 x i8> %tmp3
    143 }
    144 
    145 define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    146 ;CHECK-LABEL: srhadd4h:
    147 ;CHECK: srhadd.4h
    148 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    149 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    150 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    151 	ret <4 x i16> %tmp3
    152 }
    153 
    154 define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    155 ;CHECK-LABEL: srhadd8h:
    156 ;CHECK: srhadd.8h
    157 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    158 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    159 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    160 	ret <8 x i16> %tmp3
    161 }
    162 
    163 define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    164 ;CHECK-LABEL: srhadd2s:
    165 ;CHECK: srhadd.2s
    166 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    167 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    168 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    169 	ret <2 x i32> %tmp3
    170 }
    171 
    172 define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    173 ;CHECK-LABEL: srhadd4s:
    174 ;CHECK: srhadd.4s
    175 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    176 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    177 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    178 	ret <4 x i32> %tmp3
    179 }
    180 
    181 define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
    182 ;CHECK-LABEL: urhadd8b:
    183 ;CHECK: urhadd.8b
    184 	%tmp1 = load <8 x i8>, <8 x i8>* %A
    185 	%tmp2 = load <8 x i8>, <8 x i8>* %B
    186 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
    187 	ret <8 x i8> %tmp3
    188 }
    189 
    190 define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
    191 ;CHECK-LABEL: urhadd16b:
    192 ;CHECK: urhadd.16b
    193 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    194 	%tmp2 = load <16 x i8>, <16 x i8>* %B
    195 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
    196 	ret <16 x i8> %tmp3
    197 }
    198 
    199 define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
    200 ;CHECK-LABEL: urhadd4h:
    201 ;CHECK: urhadd.4h
    202 	%tmp1 = load <4 x i16>, <4 x i16>* %A
    203 	%tmp2 = load <4 x i16>, <4 x i16>* %B
    204 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
    205 	ret <4 x i16> %tmp3
    206 }
    207 
    208 define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
    209 ;CHECK-LABEL: urhadd8h:
    210 ;CHECK: urhadd.8h
    211 	%tmp1 = load <8 x i16>, <8 x i16>* %A
    212 	%tmp2 = load <8 x i16>, <8 x i16>* %B
    213 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
    214 	ret <8 x i16> %tmp3
    215 }
    216 
    217 define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
    218 ;CHECK-LABEL: urhadd2s:
    219 ;CHECK: urhadd.2s
    220 	%tmp1 = load <2 x i32>, <2 x i32>* %A
    221 	%tmp2 = load <2 x i32>, <2 x i32>* %B
    222 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
    223 	ret <2 x i32> %tmp3
    224 }
    225 
    226 define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
    227 ;CHECK-LABEL: urhadd4s:
    228 ;CHECK: urhadd.4s
    229 	%tmp1 = load <4 x i32>, <4 x i32>* %A
    230 	%tmp2 = load <4 x i32>, <4 x i32>* %B
    231 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
    232 	ret <4 x i32> %tmp3
    233 }
    234 
    235 declare <8 x i8>  @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    236 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    237 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    238 
    239 declare <8 x i8>  @llvm.aarch64.neon.urhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
    240 declare <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
    241 declare <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
    242 
    243 declare <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    244 declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    245 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    246 
    247 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
    248 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
    249 declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
    250