Home | History | Annotate | Download | only in Hexagon
      1 ; REQUIRES: asserts
      2 ; RUN: llc -march=hexagon --stats -o - 2>&1 < %s | FileCheck %s
      3 
      4 ; Check that the compilation succeeded and that some code was generated.
      5 ; CHECK: vadd
      6 
      7 ; Check that the loop is pipelined and that a valid node order is used.
      8 ; CHECK-NOT: Number of node order issues found
      9 ; CHECK: Number of loops software pipelined
     10 ; CHECK-NOT: Number of node order issues found
     11 
     12 target triple = "hexagon"
     13 
     14 define void @fred(i16* noalias nocapture readonly %p0, i32 %p1, i32 %p2, i16* noalias nocapture %p3, i32 %p4) local_unnamed_addr #1 {
     15 entry:
     16   %mul = mul i32 %p4, %p1
     17   %add.ptr = getelementptr inbounds i16, i16* %p0, i32 %mul
     18   %add = add nsw i32 %p4, 1
     19   %rem = srem i32 %add, 5
     20   %mul1 = mul i32 %rem, %p1
     21   %add.ptr2 = getelementptr inbounds i16, i16* %p0, i32 %mul1
     22   %add.ptr6 = getelementptr inbounds i16, i16* %p0, i32 0
     23   %add7 = add nsw i32 %p4, 3
     24   %rem8 = srem i32 %add7, 5
     25   %mul9 = mul i32 %rem8, %p1
     26   %add.ptr10 = getelementptr inbounds i16, i16* %p0, i32 %mul9
     27   %add.ptr14 = getelementptr inbounds i16, i16* %p0, i32 0
     28   %incdec.ptr18 = getelementptr inbounds i16, i16* %add.ptr14, i32 32
     29   %0 = bitcast i16* %incdec.ptr18 to <16 x i32>*
     30   %incdec.ptr17 = getelementptr inbounds i16, i16* %add.ptr10, i32 32
     31   %1 = bitcast i16* %incdec.ptr17 to <16 x i32>*
     32   %incdec.ptr16 = getelementptr inbounds i16, i16* %add.ptr6, i32 32
     33   %2 = bitcast i16* %incdec.ptr16 to <16 x i32>*
     34   %incdec.ptr15 = getelementptr inbounds i16, i16* %add.ptr2, i32 32
     35   %3 = bitcast i16* %incdec.ptr15 to <16 x i32>*
     36   %incdec.ptr = getelementptr inbounds i16, i16* %add.ptr, i32 32
     37   %4 = bitcast i16* %incdec.ptr to <16 x i32>*
     38   %5 = bitcast i16* %p3 to <16 x i32>*
     39   br i1 undef, label %for.end.loopexit.unr-lcssa, label %for.body
     40 
     41 for.body:                                         ; preds = %for.body, %entry
     42   %optr.0102 = phi <16 x i32>* [ %incdec.ptr24.3, %for.body ], [ %5, %entry ]
     43   %iptr4.0101 = phi <16 x i32>* [ %incdec.ptr23.3, %for.body ], [ %0, %entry ]
     44   %iptr3.0100 = phi <16 x i32>* [ %incdec.ptr22.3, %for.body ], [ %1, %entry ]
     45   %iptr2.099 = phi <16 x i32>* [ undef, %for.body ], [ %2, %entry ]
     46   %iptr1.098 = phi <16 x i32>* [ %incdec.ptr20.3, %for.body ], [ %3, %entry ]
     47   %iptr0.097 = phi <16 x i32>* [ %incdec.ptr19.3, %for.body ], [ %4, %entry ]
     48   %dVsumv1.096 = phi <32 x i32> [ %66, %for.body ], [ undef, %entry ]
     49   %niter = phi i32 [ %niter.nsub.3, %for.body ], [ undef, %entry ]
     50   %6 = load <16 x i32>, <16 x i32>* %iptr0.097, align 64, !tbaa !1
     51   %7 = load <16 x i32>, <16 x i32>* %iptr1.098, align 64, !tbaa !1
     52   %8 = load <16 x i32>, <16 x i32>* %iptr2.099, align 64, !tbaa !1
     53   %9 = load <16 x i32>, <16 x i32>* %iptr3.0100, align 64, !tbaa !1
     54   %10 = load <16 x i32>, <16 x i32>* %iptr4.0101, align 64, !tbaa !1
     55   %11 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %6, <16 x i32> %10)
     56   %12 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %11, <16 x i32> %8, i32 393222)
     57   %13 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %9, <16 x i32> %7)
     58   %14 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %12, <32 x i32> %13, i32 67372036)
     59   %15 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %dVsumv1.096)
     60   %16 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %14)
     61   %17 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %16, <16 x i32> %15, i32 4)
     62   %18 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %14)
     63   %19 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %16, <16 x i32> %15, i32 8)
     64   %20 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %18, <16 x i32> undef, i32 8)
     65   %21 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %17, <16 x i32> %19)
     66   %22 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %15, <16 x i32> %19)
     67   %23 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %22, <16 x i32> %17, i32 101058054)
     68   %24 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %23, <16 x i32> zeroinitializer, i32 67372036)
     69   %25 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %20)
     70   %26 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %25, <16 x i32> undef, i32 101058054)
     71   %27 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %26, <16 x i32> %21, i32 67372036)
     72   %28 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %27, <16 x i32> %24, i32 8)
     73   %incdec.ptr24 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 1
     74   store <16 x i32> %28, <16 x i32>* %optr.0102, align 64, !tbaa !1
     75   %incdec.ptr19.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 2
     76   %incdec.ptr23.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 2
     77   %29 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %14)
     78   %30 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %14)
     79   %31 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %29, i32 4)
     80   %32 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> undef, <16 x i32> %30, i32 4)
     81   %33 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %29, i32 8)
     82   %34 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> undef, <16 x i32> %30, i32 8)
     83   %35 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %31, <16 x i32> %33)
     84   %36 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %29, <16 x i32> %33)
     85   %37 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %36, <16 x i32> %31, i32 101058054)
     86   %38 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %37, <16 x i32> undef, i32 67372036)
     87   %39 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %30, <16 x i32> %34)
     88   %40 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %39, <16 x i32> %32, i32 101058054)
     89   %41 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %40, <16 x i32> %35, i32 67372036)
     90   %42 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %41, <16 x i32> %38, i32 8)
     91   %incdec.ptr24.1 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 2
     92   store <16 x i32> %42, <16 x i32>* %incdec.ptr24, align 64, !tbaa !1
     93   %incdec.ptr19.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 3
     94   %43 = load <16 x i32>, <16 x i32>* %incdec.ptr19.1, align 64, !tbaa !1
     95   %incdec.ptr20.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr1.098, i32 3
     96   %incdec.ptr21.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr2.099, i32 3
     97   %incdec.ptr22.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr3.0100, i32 3
     98   %incdec.ptr23.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 3
     99   %44 = load <16 x i32>, <16 x i32>* %incdec.ptr23.1, align 64, !tbaa !1
    100   %45 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %43, <16 x i32> %44)
    101   %46 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %45, <16 x i32> undef, i32 393222)
    102   %47 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %46, <32 x i32> undef, i32 67372036)
    103   %48 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %47)
    104   %49 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %48, <16 x i32> undef, i32 4)
    105   %50 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %48, <16 x i32> undef, i32 8)
    106   %51 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> zeroinitializer, <16 x i32> undef)
    107   %52 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %49, <16 x i32> %50)
    108   %53 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> undef, <16 x i32> %50)
    109   %54 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %53, <16 x i32> %49, i32 101058054)
    110   %55 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %54, <16 x i32> %51, i32 67372036)
    111   %56 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %52, i32 67372036)
    112   %57 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %56, <16 x i32> %55, i32 8)
    113   %incdec.ptr24.2 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 3
    114   store <16 x i32> %57, <16 x i32>* %incdec.ptr24.1, align 64, !tbaa !1
    115   %incdec.ptr19.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr0.097, i32 4
    116   %58 = load <16 x i32>, <16 x i32>* %incdec.ptr19.2, align 64, !tbaa !1
    117   %incdec.ptr20.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr1.098, i32 4
    118   %59 = load <16 x i32>, <16 x i32>* %incdec.ptr20.2, align 64, !tbaa !1
    119   %60 = load <16 x i32>, <16 x i32>* %incdec.ptr21.2, align 64, !tbaa !1
    120   %incdec.ptr22.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr3.0100, i32 4
    121   %61 = load <16 x i32>, <16 x i32>* %incdec.ptr22.2, align 64, !tbaa !1
    122   %incdec.ptr23.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %iptr4.0101, i32 4
    123   %62 = load <16 x i32>, <16 x i32>* %incdec.ptr23.2, align 64, !tbaa !1
    124   %63 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %58, <16 x i32> %62)
    125   %64 = tail call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %63, <16 x i32> %60, i32 393222)
    126   %65 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %61, <16 x i32> %59)
    127   %66 = tail call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %64, <32 x i32> %65, i32 67372036)
    128   %67 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %47)
    129   %68 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %66)
    130   %69 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %68, <16 x i32> undef, i32 4)
    131   %70 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %66)
    132   %71 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %70, <16 x i32> %67, i32 4)
    133   %72 = tail call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %70, <16 x i32> %67, i32 8)
    134   %73 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %67, <16 x i32> %71)
    135   %74 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> undef, <16 x i32> %69, i32 101058054)
    136   %75 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %74, <16 x i32> %73, i32 67372036)
    137   %76 = tail call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %67, <16 x i32> %72)
    138   %77 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %76, <16 x i32> %71, i32 101058054)
    139   %78 = tail call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %77, <16 x i32> undef, i32 67372036)
    140   %79 = tail call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %78, <16 x i32> %75, i32 8)
    141   %incdec.ptr24.3 = getelementptr inbounds <16 x i32>, <16 x i32>* %optr.0102, i32 4
    142   store <16 x i32> %79, <16 x i32>* %incdec.ptr24.2, align 64, !tbaa !1
    143   %niter.nsub.3 = add i32 %niter, -4
    144   %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0
    145   br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa, label %for.body
    146 
    147 for.end.loopexit.unr-lcssa:                       ; preds = %for.body, %entry
    148   ret void
    149 }
    150 
    151 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #0
    152 declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #0
    153 declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #0
    154 declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #0
    155 declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #0
    156 declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #0
    157 declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #0
    158 declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #0
    159 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #0
    160 declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #0
    161 declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #0
    162 
    163 attributes #0 = { nounwind readnone }
    164 attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
    165 
    166 !1 = !{!2, !2, i64 0}
    167 !2 = !{!"omnipotent char", !3, i64 0}
    168 !3 = !{!"Simple C/C++ TBAA"}
    169