Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
      2 ; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s
      3 
      4 ; SI-LABEL: {{^}}infinite_loop:
      5 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
      6 ; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
      7 ; SI: s_waitcnt lgkmcnt(0)
      8 ; SI: buffer_store_dword [[REG]]
      9 ; SI: s_branch [[LOOP]]
     10 define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
     11 entry:
     12   br label %loop
     13 
     14 loop:
     15   store volatile i32 999, i32 addrspace(1)* %out, align 4
     16   br label %loop
     17 }
     18 
     19 
     20 ; IR-LABEL: @infinite_loop_ret(
     21 ; IR:  br i1 %cond, label %loop, label %UnifiedReturnBlock
     22 
     23 ; IR: loop:
     24 ; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
     25 ; IR: br i1 true, label %loop, label %UnifiedReturnBlock
     26 
     27 ; IR: UnifiedReturnBlock:
     28 ; IR:  ret void
     29 
     30 
     31 ; SI-LABEL: {{^}}infinite_loop_ret:
     32 ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
     33 
     34 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
     35 ; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
     36 ; SI: s_and_b64 vcc, exec, -1
     37 ; SI: s_waitcnt lgkmcnt(0)
     38 ; SI: buffer_store_dword [[REG]]
     39 ; SI: s_cbranch_vccnz [[LOOP]]
     40 
     41 ; SI: [[RET]]:  ; %UnifiedReturnBlock
     42 ; SI: s_endpgm
     43 define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
     44 entry:
     45   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
     46   %cond = icmp eq i32 %tmp, 1
     47   br i1 %cond, label %loop, label %return
     48 
     49 loop:
     50   store volatile i32 999, i32 addrspace(1)* %out, align 4
     51   br label %loop
     52 
     53 return:
     54   ret void
     55 }
     56 
     57 
     58 ; IR-LABEL: @infinite_loops(
     59 ; IR: br i1 undef, label %loop1, label %loop2
     60 
     61 ; IR: loop1:
     62 ; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
     63 ; IR: br i1 true, label %loop1, label %DummyReturnBlock
     64 
     65 ; IR: loop2:
     66 ; IR: store volatile i32 888, i32 addrspace(1)* %out, align 4
     67 ; IR: br i1 true, label %loop2, label %DummyReturnBlock
     68 
     69 ; IR: DummyReturnBlock:
     70 ; IR: ret void
     71 
     72 
     73 ; SI-LABEL: {{^}}infinite_loops:
     74 
     75 ; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7
     76 ; SI: s_and_b64 vcc, exec, -1
     77 
     78 ; SI: [[LOOP1:BB[0-9]+_[0-9]+]]:  ; %loop1
     79 ; SI: s_waitcnt lgkmcnt(0)
     80 ; SI: buffer_store_dword [[REG1]]
     81 ; SI: s_cbranch_vccnz [[LOOP1]]
     82 ; SI: s_branch [[RET:BB[0-9]+_[0-9]+]]
     83 
     84 ; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378
     85 ; SI: s_and_b64 vcc, exec, -1
     86 
     87 ; SI: [[LOOP2:BB[0-9]+_[0-9]+]]:  ; %loop2
     88 ; SI: s_waitcnt lgkmcnt(0)
     89 ; SI: buffer_store_dword [[REG2]]
     90 ; SI: s_cbranch_vccnz [[LOOP2]]
     91 
     92 ; SI: [[RET]]:  ; %DummyReturnBlock
     93 ; SI: s_endpgm
     94 define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
     95 entry:
     96   br i1 undef, label %loop1, label %loop2
     97 
     98 loop1:
     99   store volatile i32 999, i32 addrspace(1)* %out, align 4
    100   br label %loop1
    101 
    102 loop2:
    103   store volatile i32 888, i32 addrspace(1)* %out, align 4
    104   br label %loop2
    105 }
    106 
    107 
    108 
    109 ; IR-LABEL: @infinite_loop_nest_ret(
    110 ; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock
    111 
    112 ; IR: outer_loop:
    113 ; IR: br label %inner_loop
    114 
    115 ; IR: inner_loop:
    116 ; IR: store volatile i32 999, i32 addrspace(1)* %out, align 4
    117 ; IR: %cond3 = icmp eq i32 %tmp, 3
    118 ; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock
    119 
    120 ; IR: TransitionBlock:
    121 ; IR: br i1 %cond3, label %inner_loop, label %outer_loop
    122 
    123 ; IR: UnifiedReturnBlock:
    124 ; IR: ret void
    125 
    126 ; SI-LABEL: {{^}}infinite_loop_nest_ret:
    127 ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
    128 
    129 ; SI: s_mov_b32
    130 ; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]:  ; %outer_loop
    131 
    132 ; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]:  ; %inner_loop
    133 ; SI: s_waitcnt expcnt(0)
    134 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
    135 ; SI: s_waitcnt lgkmcnt(0)
    136 ; SI: buffer_store_dword [[REG]]
    137 
    138 ; SI: s_andn2_b64 exec
    139 ; SI: s_cbranch_execnz [[INNER_LOOP]]
    140 
    141 ; SI: s_andn2_b64 exec
    142 ; SI: s_cbranch_execnz [[OUTER_LOOP]]
    143 
    144 ; SI: [[RET]]:  ; %UnifiedReturnBlock
    145 ; SI: s_endpgm
    146 define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
    147 entry:
    148   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
    149   %cond1 = icmp eq i32 %tmp, 1
    150   br i1 %cond1, label %outer_loop, label %return
    151 
    152 outer_loop:
    153  ; %cond2 = icmp eq i32 %tmp, 2
    154  ; br i1 %cond2, label %outer_loop, label %inner_loop
    155  br label %inner_loop
    156 
    157 inner_loop:                                     ; preds = %LeafBlock, %LeafBlock1
    158   store volatile i32 999, i32 addrspace(1)* %out, align 4
    159   %cond3 = icmp eq i32 %tmp, 3
    160   br i1 %cond3, label %inner_loop, label %outer_loop
    161 
    162 return:
    163   ret void
    164 }
    165 
    166 declare i32 @llvm.amdgcn.workitem.id.x()
    167