Home | History | Annotate | Download | only in Hexagon
      1 ; RUN: llc -march=hexagon -O2 -pipeliner-max-mii=10 < %s | FileCheck %s
      2 ; CHECK-NOT: vmemu
      3 
      4 ; Function Attrs: nounwind
      5 define void @f0(i8* nocapture readonly %a0, i32 %a1, i32 %a2, i32 %a3, i16* nocapture %a4, i16* nocapture %a5) #0 {
      6 b0:
      7   %v0 = tail call i32 @llvm.hexagon.S2.vsplatrb(i32 %a3)
      8   %v1 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 %v0)
      9   %v2 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 16843009)
     10   %v3 = tail call <16 x i32> @llvm.hexagon.V6.vd0()
     11   %v4 = sdiv i32 %a2, 64
     12   %v5 = icmp sgt i32 %a2, 63
     13   br i1 %v5, label %b1, label %b6
     14 
     15 b1:                                               ; preds = %b0
     16   %v6 = bitcast i16* %a5 to <16 x i32>*
     17   %v7 = bitcast i16* %a4 to <16 x i32>*
     18   %v8 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v3, <16 x i32> %v3)
     19   br label %b2
     20 
     21 b2:                                               ; preds = %b4, %b1
     22   %v9 = phi i32 [ 0, %b1 ], [ %v100, %b4 ]
     23   %v10 = phi i8* [ %a0, %b1 ], [ %v87, %b4 ]
     24   %v11 = phi <16 x i32>* [ %v6, %b1 ], [ %v99, %b4 ]
     25   %v12 = phi <16 x i32>* [ %v7, %b1 ], [ %v95, %b4 ]
     26   %v13 = bitcast i8* %v10 to <16 x i32>*
     27   %v14 = load <16 x i32>, <16 x i32>* %v13, align 64, !tbaa !0
     28   br label %b3
     29 
     30 b3:                                               ; preds = %b3, %b2
     31   %v15 = phi i32 [ -4, %b2 ], [ %v83, %b3 ]
     32   %v16 = phi <32 x i32> [ %v8, %b2 ], [ %v78, %b3 ]
     33   %v17 = phi <16 x i32> [ %v3, %b2 ], [ %v82, %b3 ]
     34   %v18 = mul nsw i32 %v15, %a1
     35   %v19 = getelementptr inbounds i8, i8* %v10, i32 %v18
     36   %v20 = bitcast i8* %v19 to <16 x i32>*
     37   %v21 = add i32 %v18, -64
     38   %v22 = getelementptr inbounds i8, i8* %v10, i32 %v21
     39   %v23 = bitcast i8* %v22 to <16 x i32>*
     40   %v24 = load <16 x i32>, <16 x i32>* %v23, align 64, !tbaa !0
     41   %v25 = load <16 x i32>, <16 x i32>* %v20, align 64, !tbaa !0
     42   %v26 = add i32 %v18, 64
     43   %v27 = getelementptr inbounds i8, i8* %v10, i32 %v26
     44   %v28 = bitcast i8* %v27 to <16 x i32>*
     45   %v29 = load <16 x i32>, <16 x i32>* %v28, align 64, !tbaa !0
     46   %v30 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v25, <16 x i32> %v14)
     47   %v31 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v30, <16 x i32> %v1)
     48   %v32 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v31, <16 x i32> %v3, <16 x i32> %v25)
     49   %v33 = tail call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %v16, <16 x i32> %v32, i32 16843009)
     50   %v34 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v31, <16 x i32> %v17, <16 x i32> %v2)
     51   %v35 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 1)
     52   %v36 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 1)
     53   %v37 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 2)
     54   %v38 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 2)
     55   %v39 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v35, <16 x i32> %v14)
     56   %v40 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v36, <16 x i32> %v14)
     57   %v41 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v37, <16 x i32> %v14)
     58   %v42 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v38, <16 x i32> %v14)
     59   %v43 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v39, <16 x i32> %v1)
     60   %v44 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v40, <16 x i32> %v1)
     61   %v45 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v41, <16 x i32> %v1)
     62   %v46 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v42, <16 x i32> %v1)
     63   %v47 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v43, <16 x i32> %v3, <16 x i32> %v35)
     64   %v48 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v44, <16 x i32> %v3, <16 x i32> %v36)
     65   %v49 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v45, <16 x i32> %v3, <16 x i32> %v37)
     66   %v50 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v46, <16 x i32> %v3, <16 x i32> %v38)
     67   %v51 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v48, <16 x i32> %v47)
     68   %v52 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v33, <32 x i32> %v51, i32 16843009)
     69   %v53 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v50, <16 x i32> %v49)
     70   %v54 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v52, <32 x i32> %v53, i32 16843009)
     71   %v55 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v43, <16 x i32> %v34, <16 x i32> %v2)
     72   %v56 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v44, <16 x i32> %v55, <16 x i32> %v2)
     73   %v57 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v45, <16 x i32> %v56, <16 x i32> %v2)
     74   %v58 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v46, <16 x i32> %v57, <16 x i32> %v2)
     75   %v59 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 3)
     76   %v60 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 3)
     77   %v61 = tail call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %v25, <16 x i32> %v24, i32 4)
     78   %v62 = tail call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %v29, <16 x i32> %v25, i32 4)
     79   %v63 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v59, <16 x i32> %v14)
     80   %v64 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v60, <16 x i32> %v14)
     81   %v65 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v61, <16 x i32> %v14)
     82   %v66 = tail call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %v62, <16 x i32> %v14)
     83   %v67 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v63, <16 x i32> %v1)
     84   %v68 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v64, <16 x i32> %v1)
     85   %v69 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v65, <16 x i32> %v1)
     86   %v70 = tail call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %v66, <16 x i32> %v1)
     87   %v71 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v67, <16 x i32> %v3, <16 x i32> %v59)
     88   %v72 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v68, <16 x i32> %v3, <16 x i32> %v60)
     89   %v73 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v69, <16 x i32> %v3, <16 x i32> %v61)
     90   %v74 = tail call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %v70, <16 x i32> %v3, <16 x i32> %v62)
     91   %v75 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v72, <16 x i32> %v71)
     92   %v76 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v54, <32 x i32> %v75, i32 16843009)
     93   %v77 = tail call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %v74, <16 x i32> %v73)
     94   %v78 = tail call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %v76, <32 x i32> %v77, i32 16843009)
     95   %v79 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v67, <16 x i32> %v58, <16 x i32> %v2)
     96   %v80 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v68, <16 x i32> %v79, <16 x i32> %v2)
     97   %v81 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v69, <16 x i32> %v80, <16 x i32> %v2)
     98   %v82 = tail call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %v70, <16 x i32> %v81, <16 x i32> %v2)
     99   %v83 = add nsw i32 %v15, 1
    100   %v84 = icmp eq i32 %v83, 5
    101   br i1 %v84, label %b4, label %b3
    102 
    103 b4:                                               ; preds = %b3
    104   %v85 = phi <16 x i32> [ %v82, %b3 ]
    105   %v86 = phi <32 x i32> [ %v78, %b3 ]
    106   %v87 = getelementptr inbounds i8, i8* %v10, i32 64
    107   %v88 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v86)
    108   %v89 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v86)
    109   %v90 = tail call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %v88, <16 x i32> %v89, i32 -2)
    110   %v91 = tail call <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32> %v85)
    111   %v92 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v90)
    112   %v93 = getelementptr inbounds <16 x i32>, <16 x i32>* %v12, i32 1
    113   store <16 x i32> %v92, <16 x i32>* %v12, align 64, !tbaa !0
    114   %v94 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v90)
    115   %v95 = getelementptr inbounds <16 x i32>, <16 x i32>* %v12, i32 2
    116   store <16 x i32> %v94, <16 x i32>* %v93, align 64, !tbaa !0
    117   %v96 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v91)
    118   %v97 = getelementptr inbounds <16 x i32>, <16 x i32>* %v11, i32 1
    119   store <16 x i32> %v96, <16 x i32>* %v11, align 64, !tbaa !0
    120   %v98 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v91)
    121   %v99 = getelementptr inbounds <16 x i32>, <16 x i32>* %v11, i32 2
    122   store <16 x i32> %v98, <16 x i32>* %v97, align 64, !tbaa !0
    123   %v100 = add nsw i32 %v9, 1
    124   %v101 = icmp slt i32 %v100, %v4
    125   br i1 %v101, label %b2, label %b5
    126 
    127 b5:                                               ; preds = %b4
    128   br label %b6
    129 
    130 b6:                                               ; preds = %b5, %b0
    131   ret void
    132 }
    133 
    134 ; Function Attrs: nounwind readnone
    135 declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
    136 
    137 ; Function Attrs: nounwind readnone
    138 declare i32 @llvm.hexagon.S2.vsplatrb(i32) #1
    139 
    140 ; Function Attrs: nounwind readnone
    141 declare <16 x i32> @llvm.hexagon.V6.vd0() #1
    142 
    143 ; Function Attrs: nounwind readnone
    144 declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
    145 
    146 ; Function Attrs: nounwind readnone
    147 declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
    148 
    149 ; Function Attrs: nounwind readnone
    150 declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
    151 
    152 ; Function Attrs: nounwind readnone
    153 declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
    154 
    155 ; Function Attrs: nounwind readnone
    156 declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
    157 
    158 ; Function Attrs: nounwind readnone
    159 declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
    160 
    161 ; Function Attrs: nounwind readnone
    162 declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
    163 
    164 ; Function Attrs: nounwind readnone
    165 declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
    166 
    167 ; Function Attrs: nounwind readnone
    168 declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #1
    169 
    170 ; Function Attrs: nounwind readnone
    171 declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
    172 
    173 ; Function Attrs: nounwind readnone
    174 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
    175 
    176 ; Function Attrs: nounwind readnone
    177 declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
    178 
    179 ; Function Attrs: nounwind readnone
    180 declare <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32>) #1
    181 
    182 ; Function Attrs: nounwind
    183 define void @f1(i16* nocapture readonly %a0, i16* nocapture readonly %a1, i16* nocapture readonly %a2, i32 %a3, i8* nocapture %a4) #0 {
    184 b0:
    185   %v0 = tail call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 983055)
    186   %v1 = sdiv i32 %a3, 64
    187   %v2 = icmp sgt i32 %a3, 63
    188   br i1 %v2, label %b1, label %b4
    189 
    190 b1:                                               ; preds = %b0
    191   %v3 = bitcast i8* %a4 to <16 x i32>*
    192   %v4 = bitcast i16* %a1 to <16 x i32>*
    193   %v5 = bitcast i16* %a2 to <16 x i32>*
    194   %v6 = bitcast i16* %a0 to <16 x i32>*
    195   br label %b2
    196 
    197 b2:                                               ; preds = %b2, %b1
    198   %v7 = phi i32 [ 0, %b1 ], [ %v44, %b2 ]
    199   %v8 = phi <16 x i32>* [ %v3, %b1 ], [ %v43, %b2 ]
    200   %v9 = phi <16 x i32>* [ %v4, %b1 ], [ %v29, %b2 ]
    201   %v10 = phi <16 x i32>* [ %v5, %b1 ], [ %v32, %b2 ]
    202   %v11 = phi <16 x i32>* [ %v6, %b1 ], [ %v27, %b2 ]
    203   %v12 = getelementptr inbounds <16 x i32>, <16 x i32>* %v11, i32 1
    204   %v13 = load <16 x i32>, <16 x i32>* %v11, align 64, !tbaa !0
    205   %v14 = getelementptr inbounds <16 x i32>, <16 x i32>* %v9, i32 1
    206   %v15 = load <16 x i32>, <16 x i32>* %v9, align 64, !tbaa !0
    207   %v16 = tail call <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32> %v13, <16 x i32> %v15)
    208   %v17 = getelementptr inbounds <16 x i32>, <16 x i32>* %v10, i32 1
    209   %v18 = load <16 x i32>, <16 x i32>* %v10, align 64, !tbaa !0
    210   %v19 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %v18, <16 x i32> %v0)
    211   %v20 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v16)
    212   %v21 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v19)
    213   %v22 = tail call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %v20, <16 x i32> %v21)
    214   %v23 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v16)
    215   %v24 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v19)
    216   %v25 = tail call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %v23, <16 x i32> %v24)
    217   %v26 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v25, <16 x i32> %v22)
    218   %v27 = getelementptr inbounds <16 x i32>, <16 x i32>* %v11, i32 2
    219   %v28 = load <16 x i32>, <16 x i32>* %v12, align 64, !tbaa !0
    220   %v29 = getelementptr inbounds <16 x i32>, <16 x i32>* %v9, i32 2
    221   %v30 = load <16 x i32>, <16 x i32>* %v14, align 64, !tbaa !0
    222   %v31 = tail call <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32> %v28, <16 x i32> %v30)
    223   %v32 = getelementptr inbounds <16 x i32>, <16 x i32>* %v10, i32 2
    224   %v33 = load <16 x i32>, <16 x i32>* %v17, align 64, !tbaa !0
    225   %v34 = tail call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %v33, <16 x i32> %v0)
    226   %v35 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v31)
    227   %v36 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %v34)
    228   %v37 = tail call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %v35, <16 x i32> %v36)
    229   %v38 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v31)
    230   %v39 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %v34)
    231   %v40 = tail call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %v38, <16 x i32> %v39)
    232   %v41 = tail call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %v40, <16 x i32> %v37)
    233   %v42 = tail call <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32> %v41, <16 x i32> %v26)
    234   %v43 = getelementptr inbounds <16 x i32>, <16 x i32>* %v8, i32 1
    235   store <16 x i32> %v42, <16 x i32>* %v8, align 64, !tbaa !0
    236   %v44 = add nsw i32 %v7, 1
    237   %v45 = icmp slt i32 %v44, %v1
    238   br i1 %v45, label %b2, label %b3
    239 
    240 b3:                                               ; preds = %b2
    241   br label %b4
    242 
    243 b4:                                               ; preds = %b3, %b0
    244   ret void
    245 }
    246 
    247 ; Function Attrs: nounwind readnone
    248 declare <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32>, <16 x i32>) #1
    249 
    250 ; Function Attrs: nounwind readnone
    251 declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #1
    252 
    253 ; Function Attrs: nounwind readnone
    254 declare <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32>, <16 x i32>) #1
    255 
    256 ; Function Attrs: nounwind readnone
    257 declare <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32>, <16 x i32>) #1
    258 
    259 ; Function Attrs: nounwind readnone
    260 declare <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32>, <16 x i32>) #1
    261 
    262 attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" }
    263 attributes #1 = { nounwind readnone }
    264 
    265 !0 = !{!1, !1, i64 0}
    266 !1 = !{!"omnipotent char", !2, i64 0}
    267 !2 = !{!"Simple C/C++ TBAA"}
    268