1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s 2 3 define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 4 ;CHECK-LABEL: sqadd8b: 5 ;CHECK: sqadd.8b 6 %tmp1 = load <8 x i8>, <8 x i8>* %A 7 %tmp2 = load <8 x i8>, <8 x i8>* %B 8 %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 9 ret <8 x i8> %tmp3 10 } 11 12 define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 13 ;CHECK-LABEL: sqadd4h: 14 ;CHECK: sqadd.4h 15 %tmp1 = load <4 x i16>, <4 x i16>* %A 16 %tmp2 = load <4 x i16>, <4 x i16>* %B 17 %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 18 ret <4 x i16> %tmp3 19 } 20 21 define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 22 ;CHECK-LABEL: sqadd2s: 23 ;CHECK: sqadd.2s 24 %tmp1 = load <2 x i32>, <2 x i32>* %A 25 %tmp2 = load <2 x i32>, <2 x i32>* %B 26 %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 27 ret <2 x i32> %tmp3 28 } 29 30 define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 31 ;CHECK-LABEL: uqadd8b: 32 ;CHECK: uqadd.8b 33 %tmp1 = load <8 x i8>, <8 x i8>* %A 34 %tmp2 = load <8 x i8>, <8 x i8>* %B 35 %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 36 ret <8 x i8> %tmp3 37 } 38 39 define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 40 ;CHECK-LABEL: uqadd4h: 41 ;CHECK: uqadd.4h 42 %tmp1 = load <4 x i16>, <4 x i16>* %A 43 %tmp2 = load <4 x i16>, <4 x i16>* %B 44 %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 45 ret <4 x i16> %tmp3 46 } 47 48 define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 49 ;CHECK-LABEL: uqadd2s: 50 ;CHECK: uqadd.2s 51 %tmp1 = load <2 x i32>, <2 x i32>* %A 52 %tmp2 = load <2 x i32>, <2 x i32>* %B 53 %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 54 ret <2 x i32> %tmp3 55 } 56 57 define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 58 ;CHECK-LABEL: sqadd16b: 59 ;CHECK: sqadd.16b 60 %tmp1 = load <16 x i8>, <16 x i8>* %A 61 %tmp2 = load <16 x i8>, <16 x i8>* %B 62 %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 63 ret <16 x i8> %tmp3 64 } 65 66 define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 67 ;CHECK-LABEL: sqadd8h: 68 ;CHECK: sqadd.8h 69 %tmp1 = load <8 x i16>, <8 x i16>* %A 70 %tmp2 = load <8 x i16>, <8 x i16>* %B 71 %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 72 ret <8 x i16> %tmp3 73 } 74 75 define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 76 ;CHECK-LABEL: sqadd4s: 77 ;CHECK: sqadd.4s 78 %tmp1 = load <4 x i32>, <4 x i32>* %A 79 %tmp2 = load <4 x i32>, <4 x i32>* %B 80 %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 81 ret <4 x i32> %tmp3 82 } 83 84 define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 85 ;CHECK-LABEL: sqadd2d: 86 ;CHECK: sqadd.2d 87 %tmp1 = load <2 x i64>, <2 x i64>* %A 88 %tmp2 = load <2 x i64>, <2 x i64>* %B 89 %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 90 ret <2 x i64> %tmp3 91 } 92 93 define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 94 ;CHECK-LABEL: uqadd16b: 95 ;CHECK: uqadd.16b 96 %tmp1 = load <16 x i8>, <16 x i8>* %A 97 %tmp2 = load <16 x i8>, <16 x i8>* %B 98 %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 99 ret <16 x i8> %tmp3 100 } 101 102 define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 103 ;CHECK-LABEL: uqadd8h: 104 ;CHECK: uqadd.8h 105 %tmp1 = load <8 x i16>, <8 x i16>* %A 106 %tmp2 = load <8 x i16>, <8 x i16>* %B 107 %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 108 ret <8 x i16> %tmp3 109 } 110 111 define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 112 ;CHECK-LABEL: uqadd4s: 113 ;CHECK: uqadd.4s 114 %tmp1 = load <4 x i32>, <4 x i32>* %A 115 %tmp2 = load <4 x i32>, <4 x i32>* %B 116 %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 117 ret <4 x i32> %tmp3 118 } 119 120 define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 121 ;CHECK-LABEL: uqadd2d: 122 ;CHECK: uqadd.2d 123 %tmp1 = load <2 x i64>, <2 x i64>* %A 124 %tmp2 = load <2 x i64>, <2 x i64>* %B 125 %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 126 ret <2 x i64> %tmp3 127 } 128 129 declare <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 130 declare <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 131 declare <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 132 declare <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 133 134 declare <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 135 declare <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 136 declare <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 137 declare <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 138 139 declare <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 140 declare <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 141 declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 142 declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 143 144 declare <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 145 declare <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 146 declare <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 147 declare <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 148 149 define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 150 ;CHECK-LABEL: usqadd8b: 151 ;CHECK: usqadd.8b 152 %tmp1 = load <8 x i8>, <8 x i8>* %A 153 %tmp2 = load <8 x i8>, <8 x i8>* %B 154 %tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 155 ret <8 x i8> %tmp3 156 } 157 158 define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 159 ;CHECK-LABEL: usqadd4h: 160 ;CHECK: usqadd.4h 161 %tmp1 = load <4 x i16>, <4 x i16>* %A 162 %tmp2 = load <4 x i16>, <4 x i16>* %B 163 %tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 164 ret <4 x i16> %tmp3 165 } 166 167 define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 168 ;CHECK-LABEL: usqadd2s: 169 ;CHECK: usqadd.2s 170 %tmp1 = load <2 x i32>, <2 x i32>* %A 171 %tmp2 = load <2 x i32>, <2 x i32>* %B 172 %tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 173 ret <2 x i32> %tmp3 174 } 175 176 define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 177 ;CHECK-LABEL: usqadd16b: 178 ;CHECK: usqadd.16b 179 %tmp1 = load <16 x i8>, <16 x i8>* %A 180 %tmp2 = load <16 x i8>, <16 x i8>* %B 181 %tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 182 ret <16 x i8> %tmp3 183 } 184 185 define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 186 ;CHECK-LABEL: usqadd8h: 187 ;CHECK: usqadd.8h 188 %tmp1 = load <8 x i16>, <8 x i16>* %A 189 %tmp2 = load <8 x i16>, <8 x i16>* %B 190 %tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 191 ret <8 x i16> %tmp3 192 } 193 194 define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 195 ;CHECK-LABEL: usqadd4s: 196 ;CHECK: usqadd.4s 197 %tmp1 = load <4 x i32>, <4 x i32>* %A 198 %tmp2 = load <4 x i32>, <4 x i32>* %B 199 %tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 200 ret <4 x i32> %tmp3 201 } 202 203 define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 204 ;CHECK-LABEL: usqadd2d: 205 ;CHECK: usqadd.2d 206 %tmp1 = load <2 x i64>, <2 x i64>* %A 207 %tmp2 = load <2 x i64>, <2 x i64>* %B 208 %tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 209 ret <2 x i64> %tmp3 210 } 211 212 define i64 @usqadd_d(i64 %l, i64 %r) nounwind { 213 ; CHECK-LABEL: usqadd_d: 214 ; CHECK: usqadd {{d[0-9]+}}, {{d[0-9]+}} 215 %sum = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %l, i64 %r) 216 ret i64 %sum 217 } 218 219 define i32 @usqadd_s(i32 %l, i32 %r) nounwind { 220 ; CHECK-LABEL: usqadd_s: 221 ; CHECK: usqadd {{s[0-9]+}}, {{s[0-9]+}} 222 %sum = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %l, i32 %r) 223 ret i32 %sum 224 } 225 226 declare <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 227 declare <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 228 declare <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 229 declare <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 230 declare i64 @llvm.aarch64.neon.usqadd.i64(i64, i64) nounwind readnone 231 declare i32 @llvm.aarch64.neon.usqadd.i32(i32, i32) nounwind readnone 232 233 declare <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 234 declare <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 235 declare <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 236 declare <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 237 238 define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind { 239 ;CHECK-LABEL: suqadd8b: 240 ;CHECK: suqadd.8b 241 %tmp1 = load <8 x i8>, <8 x i8>* %A 242 %tmp2 = load <8 x i8>, <8 x i8>* %B 243 %tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) 244 ret <8 x i8> %tmp3 245 } 246 247 define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { 248 ;CHECK-LABEL: suqadd4h: 249 ;CHECK: suqadd.4h 250 %tmp1 = load <4 x i16>, <4 x i16>* %A 251 %tmp2 = load <4 x i16>, <4 x i16>* %B 252 %tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) 253 ret <4 x i16> %tmp3 254 } 255 256 define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { 257 ;CHECK-LABEL: suqadd2s: 258 ;CHECK: suqadd.2s 259 %tmp1 = load <2 x i32>, <2 x i32>* %A 260 %tmp2 = load <2 x i32>, <2 x i32>* %B 261 %tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) 262 ret <2 x i32> %tmp3 263 } 264 265 define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind { 266 ;CHECK-LABEL: suqadd16b: 267 ;CHECK: suqadd.16b 268 %tmp1 = load <16 x i8>, <16 x i8>* %A 269 %tmp2 = load <16 x i8>, <16 x i8>* %B 270 %tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) 271 ret <16 x i8> %tmp3 272 } 273 274 define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { 275 ;CHECK-LABEL: suqadd8h: 276 ;CHECK: suqadd.8h 277 %tmp1 = load <8 x i16>, <8 x i16>* %A 278 %tmp2 = load <8 x i16>, <8 x i16>* %B 279 %tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) 280 ret <8 x i16> %tmp3 281 } 282 283 define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { 284 ;CHECK-LABEL: suqadd4s: 285 ;CHECK: suqadd.4s 286 %tmp1 = load <4 x i32>, <4 x i32>* %A 287 %tmp2 = load <4 x i32>, <4 x i32>* %B 288 %tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) 289 ret <4 x i32> %tmp3 290 } 291 292 define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind { 293 ;CHECK-LABEL: suqadd2d: 294 ;CHECK: suqadd.2d 295 %tmp1 = load <2 x i64>, <2 x i64>* %A 296 %tmp2 = load <2 x i64>, <2 x i64>* %B 297 %tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2) 298 ret <2 x i64> %tmp3 299 } 300 301 define <1 x i64> @suqadd_1d(<1 x i64> %l, <1 x i64> %r) nounwind { 302 ; CHECK-LABEL: suqadd_1d: 303 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} 304 %sum = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %l, <1 x i64> %r) 305 ret <1 x i64> %sum 306 } 307 308 define i64 @suqadd_d(i64 %l, i64 %r) nounwind { 309 ; CHECK-LABEL: suqadd_d: 310 ; CHECK: suqadd {{d[0-9]+}}, {{d[0-9]+}} 311 %sum = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %l, i64 %r) 312 ret i64 %sum 313 } 314 315 define i32 @suqadd_s(i32 %l, i32 %r) nounwind { 316 ; CHECK-LABEL: suqadd_s: 317 ; CHECK: suqadd {{s[0-9]+}}, {{s[0-9]+}} 318 %sum = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %l, i32 %r) 319 ret i32 %sum 320 } 321 322 declare <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone 323 declare <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 324 declare <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 325 declare <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64>, <1 x i64>) nounwind readnone 326 declare i64 @llvm.aarch64.neon.suqadd.i64(i64, i64) nounwind readnone 327 declare i32 @llvm.aarch64.neon.suqadd.i32(i32, i32) nounwind readnone 328 329 declare <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 330 declare <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 331 declare <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 332 declare <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64>, <2 x i64>) nounwind readnone 333