Home | History | Annotate | Download | only in Hexagon
      1 ; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \
      2 ; RUN:     -hexagon-bit=0 < %s | FileCheck %s
      3 
      4 ; This spill should be eliminated.
      5 ; CHECK-NOT: vmem(r29+#6)
      6 
      7 define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 {
      8 entry:
      9   %0 = bitcast i8* %key to <32 x i32>*
     10   %1 = bitcast i8* %data1 to <32 x i32>*
     11   br label %for.body
     12 
     13 for.body:
     14   %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ]
     15   %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ]
     16   %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ]
     17   %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ]
     18   %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128
     19   %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128
     20   %4 = load <32 x i32>, <32 x i32>* undef, align 128
     21   %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2
     22   %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128
     23   %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2
     24   %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128
     25   %7 = load <32 x i32>, <32 x i32>* null, align 128
     26   %8 = load <32 x i32>, <32 x i32>* undef, align 128
     27   %9 = load <32 x i32>, <32 x i32>* null, align 128
     28   %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3
     29   %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6
     30   %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128
     31   %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4
     32   %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128
     33   %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8
     34   %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8
     35   %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128
     36   %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5
     37   %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128
     38   %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10
     39   %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10
     40   %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128
     41   %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6
     42   %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128
     43   %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12
     44   %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128
     45   %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12
     46   %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128
     47   %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14
     48   %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128
     49   %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14
     50   %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128
     51   %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11)
     52   %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2)
     53   %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11)
     54   %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3)
     55   %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef)
     56   %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15)
     57   %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7)
     58   %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15)
     59   %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8)
     60   %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16)
     61   %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9)
     62   %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17)
     63   %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13)
     64   %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4)
     65   %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13)
     66   %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5)
     67   %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef)
     68   %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6)
     69   %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14)
     70   %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef)
     71   %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer)
     72   %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10)
     73   %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18)
     74   %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef)
     75   %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19)
     76   %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26)
     77   %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21)
     78   %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26)
     79   %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23)
     80   %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28)
     81   %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24)
     82   %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30)
     83   %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27)
     84   %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22)
     85   %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27)
     86   %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef)
     87   %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31)
     88   %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39)
     89   %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33)
     90   %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39)
     91   %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35)
     92   %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37)
     93   %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40)
     94   %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36)
     95   %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44)
     96   %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58)
     97   %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46)
     98   %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48)
     99   %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50)
    100   %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59)
    101   %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer)
    102   %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer)
    103   %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55)
    104   %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef)
    105   %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64)
    106   %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67)
    107   %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef)
    108   %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72)
    109   %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer)
    110   %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537)
    111   %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537)
    112   %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537)
    113   %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537)
    114   %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537)
    115   %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537)
    116   %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537)
    117   %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537)
    118   store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128
    119   store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128
    120   store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128
    121   store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128
    122   store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128
    123   store <32 x i32> zeroinitializer, <32 x i32>* null, align 128
    124   %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16
    125   %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16
    126   br i1 false, label %for.end, label %for.body
    127 
    128 for.end:
    129   %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86)
    130   ret void
    131 }
    132 
    133 declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1
    134 
    135 declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1
    136 
    137 declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1
    138 
    139 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1
    140 
    141 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1
    142 
    143 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
    144 attributes #1 = { nounwind readnone }
    145