Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 ; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s
      3 
      4 ; GCN-LABEL: {{^}}test_loop:
      5 ; GCN: s_and_b64 vcc, exec, -1
      6 ; GCN: [[LABEL:BB[0-9+]_[0-9]+]]: ; %for.body{{$}}
      7 ; GCN: ds_read_b32
      8 ; GCN: ds_write_b32
      9 ; GCN: s_cbranch_vccnz [[LABEL]]
     10 ; GCN: s_endpgm
     11 define amdgpu_kernel void @test_loop(float addrspace(3)* %ptr, i32 %n) nounwind {
     12 entry:
     13   %cmp = icmp eq i32 %n, -1
     14   br i1 %cmp, label %for.exit, label %for.body
     15 
     16 for.exit:
     17   ret void
     18 
     19 for.body:
     20   %indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
     21   %tmp = add i32 %indvar, 32
     22   %arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
     23   %vecload = load float, float addrspace(3)* %arrayidx, align 4
     24   %add = fadd float %vecload, 1.0
     25   store float %add, float addrspace(3)* %arrayidx, align 8
     26   %inc = add i32 %indvar, 1
     27   br label %for.body
     28 }
     29 
     30 ; GCN-LABEL: @loop_const_true
     31 ; GCN: [[LABEL:BB[0-9+]_[0-9]+]]:
     32 ; GCN: ds_read_b32
     33 ; GCN: ds_write_b32
     34 ; GCN: s_branch [[LABEL]]
     35 define amdgpu_kernel void @loop_const_true(float addrspace(3)* %ptr, i32 %n) nounwind {
     36 entry:
     37   br label %for.body
     38 
     39 for.exit:
     40   ret void
     41 
     42 for.body:
     43   %indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
     44   %tmp = add i32 %indvar, 32
     45   %arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
     46   %vecload = load float, float addrspace(3)* %arrayidx, align 4
     47   %add = fadd float %vecload, 1.0
     48   store float %add, float addrspace(3)* %arrayidx, align 8
     49   %inc = add i32 %indvar, 1
     50   br i1 true, label %for.body, label %for.exit
     51 }
     52 
     53 ; GCN-LABEL: {{^}}loop_const_false:
     54 ; GCN-NOT: s_branch
     55 ; GCN: s_endpgm
     56 define amdgpu_kernel void @loop_const_false(float addrspace(3)* %ptr, i32 %n) nounwind {
     57 entry:
     58   br label %for.body
     59 
     60 for.exit:
     61   ret void
     62 
     63 ; XXX - Should there be an S_ENDPGM?
     64 for.body:
     65   %indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
     66   %tmp = add i32 %indvar, 32
     67   %arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
     68   %vecload = load float, float addrspace(3)* %arrayidx, align 4
     69   %add = fadd float %vecload, 1.0
     70   store float %add, float addrspace(3)* %arrayidx, align 8
     71   %inc = add i32 %indvar, 1
     72   br i1 false, label %for.body, label %for.exit
     73 }
     74 
     75 ; GCN-LABEL: {{^}}loop_const_undef:
     76 ; GCN-NOT: s_branch
     77 ; GCN: s_endpgm
     78 define amdgpu_kernel void @loop_const_undef(float addrspace(3)* %ptr, i32 %n) nounwind {
     79 entry:
     80   br label %for.body
     81 
     82 for.exit:
     83   ret void
     84 
     85 ; XXX - Should there be an s_endpgm?
     86 for.body:
     87   %indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
     88   %tmp = add i32 %indvar, 32
     89   %arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
     90   %vecload = load float, float addrspace(3)* %arrayidx, align 4
     91   %add = fadd float %vecload, 1.0
     92   store float %add, float addrspace(3)* %arrayidx, align 8
     93   %inc = add i32 %indvar, 1
     94   br i1 undef, label %for.body, label %for.exit
     95 }
     96 
     97 ; GCN-LABEL: {{^}}loop_arg_0:
     98 ; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
     99 ; GCN: v_cmp_eq_u32{{[^,]*}}, 1,
    100 
    101 ; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
    102 ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80
    103 ; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 4
    104 
    105 ; GCN: s_cbranch_vccnz [[LOOPBB]]
    106 ; GCN-NEXT: ; %bb.2
    107 ; GCN-NEXT: s_endpgm
    108 define amdgpu_kernel void @loop_arg_0(float addrspace(3)* %ptr, i32 %n) nounwind {
    109 entry:
    110   %cond = load volatile i1, i1 addrspace(3)* null
    111   br label %for.body
    112 
    113 for.exit:
    114   ret void
    115 
    116 for.body:
    117   %indvar = phi i32 [ %inc, %for.body ], [ 0, %entry ]
    118   %tmp = add i32 %indvar, 32
    119   %arrayidx = getelementptr float, float addrspace(3)* %ptr, i32 %tmp
    120   %vecload = load float, float addrspace(3)* %arrayidx, align 4
    121   %add = fadd float %vecload, 1.0
    122   store float %add, float addrspace(3)* %arrayidx, align 8
    123   %inc = add i32 %indvar, 1
    124   br i1 %cond, label %for.body, label %for.exit
    125 }
    126