Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
      2 
      3 define i8 @add_B(<16 x i8>* %arr)  {
      4 ; CHECK-LABEL: add_B
      5 ; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
      6   %bin.rdx = load <16 x i8>, <16 x i8>* %arr
      7   %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
      8   %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0
      9   %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
     10   %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf
     11   %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
     12   %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12
     13   %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
     14   %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13
     15   %r = extractelement <16 x i8> %bin.rdx14, i32 0
     16   ret i8 %r
     17 }
     18 
     19 define i16 @add_H(<8 x i16>* %arr)  {
     20 ; CHECK-LABEL: add_H
     21 ; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
     22   %bin.rdx = load <8 x i16>, <8 x i16>* %arr
     23   %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>
     24   %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf
     25   %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     26   %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12
     27   %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     28   %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13
     29   %r = extractelement <8 x i16> %bin.rdx14, i32 0
     30   ret i16 %r
     31 }
     32 
     33 define i32 @add_S( <4 x i32>* %arr)  {
     34 ; CHECK-LABEL: add_S
     35 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
     36   %bin.rdx = load <4 x i32>, <4 x i32>* %arr
     37   %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
     38   %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf
     39   %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
     40   %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12
     41   %r = extractelement <4 x i32> %bin.rdx13, i32 0
     42   ret i32 %r
     43 }
     44 
     45 define i64 @add_D(<2 x i64>* %arr)  {
     46 ; CHECK-LABEL: add_D
     47 ; CHECK-NOT: addv
     48   %bin.rdx = load <2 x i64>, <2 x i64>* %arr
     49   %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
     50   %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0
     51   %r = extractelement <2 x i64> %bin.rdx0, i32 0
     52   ret i64 %r
     53 }
     54 
     55 define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
     56 ; CHECK-LABEL: oversized_ADDV_256
     57 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
     58 entry:
     59   %0 = bitcast i8* %arg1 to <8 x i8>*
     60   %1 = load <8 x i8>, <8 x i8>* %0, align 1
     61   %2 = zext <8 x i8> %1 to <8 x i32>
     62   %3 = bitcast i8* %arg2 to <8 x i8>*
     63   %4 = load <8 x i8>, <8 x i8>* %3, align 1
     64   %5 = zext <8 x i8> %4 to <8 x i32>
     65   %6 = sub nsw <8 x i32> %2, %5
     66   %7 = icmp slt <8 x i32> %6, zeroinitializer
     67   %8 = sub nsw <8 x i32> zeroinitializer, %6
     68   %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
     69   %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
     70   %bin.rdx = add <8 x i32> %9, %rdx.shuf
     71   %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     72   %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1
     73   %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     74   %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3
     75   %10 = extractelement <8 x i32> %bin.rdx4, i32 0
     76   ret i32 %10
     77 }
     78 
     79 define i32 @oversized_ADDV_512(<16 x i32>* %arr)  {
     80 ; CHECK-LABEL: oversized_ADDV_512
     81 ; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
     82   %bin.rdx = load <16 x i32>, <16 x i32>* %arr
     83 
     84   %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
     85   %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0
     86 
     87   %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
     88   %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf
     89 
     90   %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
     91   %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12
     92 
     93   %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
     94   %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13
     95 
     96   %r = extractelement <16 x i32> %bin.rdx14, i32 0
     97   ret i32 %r
     98 }
     99