1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3 declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>) 4 5 declare float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float>) 6 7 declare float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float>) 8 9 declare float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float>) 10 11 declare i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32>) 12 13 declare i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16>) 14 15 declare i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8>) 16 17 declare i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16>) 18 19 declare i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8>) 20 21 declare i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32>) 22 23 declare i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16>) 24 25 declare i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8>) 26 27 declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>) 28 29 declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>) 30 31 declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>) 32 33 declare i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16>) 34 35 declare i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8>) 36 37 declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>) 38 39 declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>) 40 41 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32>) 42 43 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16>) 44 45 declare i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8>) 46 47 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>) 48 49 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>) 50 51 declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>) 52 53 declare i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16>) 54 55 declare i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8>) 56 57 declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>) 58 59 declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>) 60 61 declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) 62 63 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) 64 65 declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) 66 67 declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) 68 69 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16>) 70 71 declare i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8>) 72 73 declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>) 74 75 declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) 76 77 declare i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16>) 78 79 declare i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8>) 80 81 define i16 @test_vaddlv_s8(<8 x i8> %a) { 82 ; CHECK: test_vaddlv_s8: 83 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.8b 84 entry: 85 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) 86 %0 = trunc i32 %saddlvv.i to i16 87 ret i16 %0 88 } 89 90 define i32 @test_vaddlv_s16(<4 x i16> %a) { 91 ; CHECK: test_vaddlv_s16: 92 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.4h 93 entry: 94 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) 95 ret i32 %saddlvv.i 96 } 97 98 define i16 @test_vaddlv_u8(<8 x i8> %a) { 99 ; CHECK: test_vaddlv_u8: 100 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.8b 101 entry: 102 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) 103 %0 = trunc i32 %uaddlvv.i to i16 104 ret i16 %0 105 } 106 107 define i32 @test_vaddlv_u16(<4 x i16> %a) { 108 ; CHECK: test_vaddlv_u16: 109 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.4h 110 entry: 111 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) 112 ret i32 %uaddlvv.i 113 } 114 115 define i16 @test_vaddlvq_s8(<16 x i8> %a) { 116 ; CHECK: test_vaddlvq_s8: 117 ; CHECK: saddlv h{{[0-9]+}}, {{v[0-9]+}}.16b 118 entry: 119 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) 120 %0 = trunc i32 %saddlvv.i to i16 121 ret i16 %0 122 } 123 124 define i32 @test_vaddlvq_s16(<8 x i16> %a) { 125 ; CHECK: test_vaddlvq_s16: 126 ; CHECK: saddlv s{{[0-9]+}}, {{v[0-9]+}}.8h 127 entry: 128 %saddlvv.i = tail call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) 129 ret i32 %saddlvv.i 130 } 131 132 define i64 @test_vaddlvq_s32(<4 x i32> %a) { 133 ; CHECK: test_vaddlvq_s32: 134 ; CHECK: saddlv d{{[0-9]+}}, {{v[0-9]+}}.4s 135 entry: 136 %saddlvv.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) 137 ret i64 %saddlvv.i 138 } 139 140 define i16 @test_vaddlvq_u8(<16 x i8> %a) { 141 ; CHECK: test_vaddlvq_u8: 142 ; CHECK: uaddlv h{{[0-9]+}}, {{v[0-9]+}}.16b 143 entry: 144 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) 145 %0 = trunc i32 %uaddlvv.i to i16 146 ret i16 %0 147 } 148 149 define i32 @test_vaddlvq_u16(<8 x i16> %a) { 150 ; CHECK: test_vaddlvq_u16: 151 ; CHECK: uaddlv s{{[0-9]+}}, {{v[0-9]+}}.8h 152 entry: 153 %uaddlvv.i = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) 154 ret i32 %uaddlvv.i 155 } 156 157 define i64 @test_vaddlvq_u32(<4 x i32> %a) { 158 ; CHECK: test_vaddlvq_u32: 159 ; CHECK: uaddlv d{{[0-9]+}}, {{v[0-9]+}}.4s 160 entry: 161 %uaddlvv.i = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) 162 ret i64 %uaddlvv.i 163 } 164 165 define i8 @test_vmaxv_s8(<8 x i8> %a) { 166 ; CHECK: test_vmaxv_s8: 167 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.8b 168 entry: 169 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) 170 %0 = trunc i32 %smaxv.i to i8 171 ret i8 %0 172 } 173 174 define i16 @test_vmaxv_s16(<4 x i16> %a) { 175 ; CHECK: test_vmaxv_s16: 176 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.4h 177 entry: 178 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) 179 %0 = trunc i32 %smaxv.i to i16 180 ret i16 %0 181 } 182 183 define i8 @test_vmaxv_u8(<8 x i8> %a) { 184 ; CHECK: test_vmaxv_u8: 185 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.8b 186 entry: 187 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) 188 %0 = trunc i32 %umaxv.i to i8 189 ret i8 %0 190 } 191 192 define i16 @test_vmaxv_u16(<4 x i16> %a) { 193 ; CHECK: test_vmaxv_u16: 194 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.4h 195 entry: 196 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) 197 %0 = trunc i32 %umaxv.i to i16 198 ret i16 %0 199 } 200 201 define i8 @test_vmaxvq_s8(<16 x i8> %a) { 202 ; CHECK: test_vmaxvq_s8: 203 ; CHECK: smaxv b{{[0-9]+}}, {{v[0-9]+}}.16b 204 entry: 205 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) 206 %0 = trunc i32 %smaxv.i to i8 207 ret i8 %0 208 } 209 210 define i16 @test_vmaxvq_s16(<8 x i16> %a) { 211 ; CHECK: test_vmaxvq_s16: 212 ; CHECK: smaxv h{{[0-9]+}}, {{v[0-9]+}}.8h 213 entry: 214 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) 215 %0 = trunc i32 %smaxv.i to i16 216 ret i16 %0 217 } 218 219 define i32 @test_vmaxvq_s32(<4 x i32> %a) { 220 ; CHECK: test_vmaxvq_s32: 221 ; CHECK: smaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 222 entry: 223 %smaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) 224 ret i32 %smaxv.i 225 } 226 227 define i8 @test_vmaxvq_u8(<16 x i8> %a) { 228 ; CHECK: test_vmaxvq_u8: 229 ; CHECK: umaxv b{{[0-9]+}}, {{v[0-9]+}}.16b 230 entry: 231 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) 232 %0 = trunc i32 %umaxv.i to i8 233 ret i8 %0 234 } 235 236 define i16 @test_vmaxvq_u16(<8 x i16> %a) { 237 ; CHECK: test_vmaxvq_u16: 238 ; CHECK: umaxv h{{[0-9]+}}, {{v[0-9]+}}.8h 239 entry: 240 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) 241 %0 = trunc i32 %umaxv.i to i16 242 ret i16 %0 243 } 244 245 define i32 @test_vmaxvq_u32(<4 x i32> %a) { 246 ; CHECK: test_vmaxvq_u32: 247 ; CHECK: umaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 248 entry: 249 %umaxv.i = tail call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) 250 ret i32 %umaxv.i 251 } 252 253 define i8 @test_vminv_s8(<8 x i8> %a) { 254 ; CHECK: test_vminv_s8: 255 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.8b 256 entry: 257 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) 258 %0 = trunc i32 %sminv.i to i8 259 ret i8 %0 260 } 261 262 define i16 @test_vminv_s16(<4 x i16> %a) { 263 ; CHECK: test_vminv_s16: 264 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.4h 265 entry: 266 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) 267 %0 = trunc i32 %sminv.i to i16 268 ret i16 %0 269 } 270 271 define i8 @test_vminv_u8(<8 x i8> %a) { 272 ; CHECK: test_vminv_u8: 273 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.8b 274 entry: 275 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) 276 %0 = trunc i32 %uminv.i to i8 277 ret i8 %0 278 } 279 280 define i16 @test_vminv_u16(<4 x i16> %a) { 281 ; CHECK: test_vminv_u16: 282 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.4h 283 entry: 284 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) 285 %0 = trunc i32 %uminv.i to i16 286 ret i16 %0 287 } 288 289 define i8 @test_vminvq_s8(<16 x i8> %a) { 290 ; CHECK: test_vminvq_s8: 291 ; CHECK: sminv b{{[0-9]+}}, {{v[0-9]+}}.16b 292 entry: 293 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) 294 %0 = trunc i32 %sminv.i to i8 295 ret i8 %0 296 } 297 298 define i16 @test_vminvq_s16(<8 x i16> %a) { 299 ; CHECK: test_vminvq_s16: 300 ; CHECK: sminv h{{[0-9]+}}, {{v[0-9]+}}.8h 301 entry: 302 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) 303 %0 = trunc i32 %sminv.i to i16 304 ret i16 %0 305 } 306 307 define i32 @test_vminvq_s32(<4 x i32> %a) { 308 ; CHECK: test_vminvq_s32: 309 ; CHECK: sminv s{{[0-9]+}}, {{v[0-9]+}}.4s 310 entry: 311 %sminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) 312 ret i32 %sminv.i 313 } 314 315 define i8 @test_vminvq_u8(<16 x i8> %a) { 316 ; CHECK: test_vminvq_u8: 317 ; CHECK: uminv b{{[0-9]+}}, {{v[0-9]+}}.16b 318 entry: 319 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) 320 %0 = trunc i32 %uminv.i to i8 321 ret i8 %0 322 } 323 324 define i16 @test_vminvq_u16(<8 x i16> %a) { 325 ; CHECK: test_vminvq_u16: 326 ; CHECK: uminv h{{[0-9]+}}, {{v[0-9]+}}.8h 327 entry: 328 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) 329 %0 = trunc i32 %uminv.i to i16 330 ret i16 %0 331 } 332 333 define i32 @test_vminvq_u32(<4 x i32> %a) { 334 ; CHECK: test_vminvq_u32: 335 ; CHECK: uminv s{{[0-9]+}}, {{v[0-9]+}}.4s 336 entry: 337 %uminv.i = tail call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) 338 ret i32 %uminv.i 339 } 340 341 define i8 @test_vaddv_s8(<8 x i8> %a) { 342 ; CHECK: test_vaddv_s8: 343 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b 344 entry: 345 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) 346 %0 = trunc i32 %vaddv.i to i8 347 ret i8 %0 348 } 349 350 define i16 @test_vaddv_s16(<4 x i16> %a) { 351 ; CHECK: test_vaddv_s16: 352 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h 353 entry: 354 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) 355 %0 = trunc i32 %vaddv.i to i16 356 ret i16 %0 357 } 358 359 define i8 @test_vaddv_u8(<8 x i8> %a) { 360 ; CHECK: test_vaddv_u8: 361 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.8b 362 entry: 363 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) 364 %0 = trunc i32 %vaddv.i to i8 365 ret i8 %0 366 } 367 368 define i16 @test_vaddv_u16(<4 x i16> %a) { 369 ; CHECK: test_vaddv_u16: 370 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.4h 371 entry: 372 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) 373 %0 = trunc i32 %vaddv.i to i16 374 ret i16 %0 375 } 376 377 define i8 @test_vaddvq_s8(<16 x i8> %a) { 378 ; CHECK: test_vaddvq_s8: 379 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b 380 entry: 381 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) 382 %0 = trunc i32 %vaddv.i to i8 383 ret i8 %0 384 } 385 386 define i16 @test_vaddvq_s16(<8 x i16> %a) { 387 ; CHECK: test_vaddvq_s16: 388 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h 389 entry: 390 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) 391 %0 = trunc i32 %vaddv.i to i16 392 ret i16 %0 393 } 394 395 define i32 @test_vaddvq_s32(<4 x i32> %a) { 396 ; CHECK: test_vaddvq_s32: 397 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s 398 entry: 399 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) 400 ret i32 %vaddv.i 401 } 402 403 define i8 @test_vaddvq_u8(<16 x i8> %a) { 404 ; CHECK: test_vaddvq_u8: 405 ; CHECK: addv b{{[0-9]+}}, {{v[0-9]+}}.16b 406 entry: 407 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) 408 %0 = trunc i32 %vaddv.i to i8 409 ret i8 %0 410 } 411 412 define i16 @test_vaddvq_u16(<8 x i16> %a) { 413 ; CHECK: test_vaddvq_u16: 414 ; CHECK: addv h{{[0-9]+}}, {{v[0-9]+}}.8h 415 entry: 416 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) 417 %0 = trunc i32 %vaddv.i to i16 418 ret i16 %0 419 } 420 421 define i32 @test_vaddvq_u32(<4 x i32> %a) { 422 ; CHECK: test_vaddvq_u32: 423 ; CHECK: addv s{{[0-9]+}}, {{v[0-9]+}}.4s 424 entry: 425 %vaddv.i = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) 426 ret i32 %vaddv.i 427 } 428 429 define float @test_vmaxvq_f32(<4 x float> %a) { 430 ; CHECK: test_vmaxvq_f32: 431 ; CHECK: fmaxv s{{[0-9]+}}, {{v[0-9]+}}.4s 432 entry: 433 %0 = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) 434 ret float %0 435 } 436 437 define float @test_vminvq_f32(<4 x float> %a) { 438 ; CHECK: test_vminvq_f32: 439 ; CHECK: fminv s{{[0-9]+}}, {{v[0-9]+}}.4s 440 entry: 441 %0 = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) 442 ret float %0 443 } 444 445 define float @test_vmaxnmvq_f32(<4 x float> %a) { 446 ; CHECK: test_vmaxnmvq_f32: 447 ; CHECK: fmaxnmv s{{[0-9]+}}, {{v[0-9]+}}.4s 448 entry: 449 %0 = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) 450 ret float %0 451 } 452 453 define float @test_vminnmvq_f32(<4 x float> %a) { 454 ; CHECK: test_vminnmvq_f32: 455 ; CHECK: fminnmv s{{[0-9]+}}, {{v[0-9]+}}.4s 456 entry: 457 %0 = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) 458 ret float %0 459 } 460 461