Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn < %s | FileCheck %s
      2 
      3 ; Check we can compile this bugpoint-reduced test without an
      4 ; infinite loop in TLI.SimplifyDemandedBits() due to failure
      5 ; to use return value of TLO.DAG.UpdateNodeOperands()
      6 
      7 ; Check that code was generated; we know there will be
      8 ; a s_endpgm, so check for it.
      9 
     10 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
     11 
     12 ; Function Attrs: nounwind readnone speculatable
     13 declare i32 @llvm.amdgcn.workitem.id.y() #0
     14 
     15 ; Function Attrs: nounwind readnone speculatable
     16 declare i32 @llvm.amdgcn.workitem.id.x() #0
     17 
     18 ; Function Attrs: nounwind readnone speculatable
     19 declare float @llvm.fmuladd.f32(float, float, float) #0
     20 
     21 ; CHECK: s_endpgm
     22 define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 {
     23 bb:
     24   %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
     25   %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
     26   %tmp5 = and i32 %tmp, 15
     27   %tmp6 = mul nuw nsw i32 %tmp5, 21
     28   %tmp7 = sub i32 %tmp6, 0
     29   %tmp8 = add i32 %tmp7, 0
     30   %tmp9 = add i32 %tmp8, 0
     31   %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0
     32   br label %bb12
     33 
     34 bb11:                                             ; preds = %bb30
     35   br i1 undef, label %bb37, label %bb38
     36 
     37 bb12:                                             ; preds = %bb30, %bb
     38   br i1 false, label %.preheader, label %.loopexit145
     39 
     40 .loopexit145:                                     ; preds = %.preheader, %bb12
     41   br label %bb13
     42 
     43 bb13:                                             ; preds = %.loopexit, %.loopexit145
     44   %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
     45   %tmp15 = add nsw i32 %tmp14, -3
     46   %tmp16 = mul i32 %tmp14, 21
     47   br i1 undef, label %bb17, label %.loopexit
     48 
     49 bb17:                                             ; preds = %bb13
     50   %tmp18 = mul i32 %tmp15, 224
     51   %tmp19 = add i32 undef, %tmp18
     52   br label %bb21
     53 
     54 .loopexit:                                        ; preds = %bb21, %bb13
     55   %tmp20 = add nuw nsw i32 %tmp14, 16
     56   br i1 undef, label %bb13, label %bb26
     57 
     58 bb21:                                             ; preds = %bb21, %bb17
     59   %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
     60   %tmp23 = add i32 %tmp22, %tmp16
     61   %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23
     62   store float undef, float addrspace(3)* %tmp24, align 4
     63   %tmp25 = add nuw i32 %tmp22, 8
     64   br i1 undef, label %bb21, label %.loopexit
     65 
     66 bb26:                                             ; preds = %.loopexit
     67   br label %bb31
     68 
     69 .preheader:                                       ; preds = %.preheader, %bb12
     70   %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ]
     71   %tmp28 = add nuw i32 %tmp27, 128
     72   %tmp29 = icmp ult i32 %tmp28, 1568
     73   br i1 %tmp29, label %.preheader, label %.loopexit145
     74 
     75 bb30:                                             ; preds = %bb31
     76   br i1 undef, label %bb11, label %bb12
     77 
     78 bb31:                                             ; preds = %bb31, %bb26
     79   %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ]
     80   %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32
     81   %tmp34 = load float, float addrspace(3)* %tmp33, align 4
     82   %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef)
     83   %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35)
     84   br i1 undef, label %bb30, label %bb31
     85 
     86 bb37:                                             ; preds = %bb11
     87   br label %bb38
     88 
     89 bb38:                                             ; preds = %bb37, %bb11
     90   ret void
     91 }
     92 
     93 attributes #0 = { nounwind readnone speculatable }
     94 
     95 !0 = !{i32 8, i32 16, i32 1}
     96