Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
      2 
      3 ; GCN-LABEL: {{^}}simple_nested_if:
      4 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
      5 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
      6 ; GCN-NEXT: s_cbranch_execz [[ENDIF]]
      7 ; GCN:      s_and_b64 exec, exec, vcc
      8 ; GCN-NEXT: ; mask branch [[ENDIF]]
      9 ; GCN-NEXT: {{^BB[0-9_]+}}:
     10 ; GCN:      store_dword
     11 ; GCN-NEXT: {{^}}[[ENDIF]]:
     12 ; GCN-NEXT: s_endpgm
     13 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
     14 bb:
     15   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
     16   %tmp1 = icmp ugt i32 %tmp, 1
     17   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
     18 
     19 bb.outer.then:                                    ; preds = %bb
     20   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
     21   store i32 0, i32 addrspace(1)* %tmp4, align 4
     22   %tmp5 = icmp eq i32 %tmp, 2
     23   br i1 %tmp5, label %bb.outer.end, label %bb.inner.then
     24 
     25 bb.inner.then:                                    ; preds = %bb.outer.then
     26   %tmp7 = add i32 %tmp, 1
     27   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
     28   store i32 1, i32 addrspace(1)* %tmp9, align 4
     29   br label %bb.outer.end
     30 
     31 bb.outer.end:                                     ; preds = %bb.outer.then, %bb.inner.then, %bb
     32   ret void
     33 }
     34 
     35 ; GCN-LABEL: {{^}}uncollapsable_nested_if:
     36 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
     37 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
     38 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
     39 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
     40 ; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]]
     41 ; GCN-NEXT: {{^BB[0-9_]+}}:
     42 ; GCN:      store_dword
     43 ; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
     44 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
     45 ; GCN:      store_dword
     46 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
     47 ; GCN-NEXT: s_endpgm
     48 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
     49 bb:
     50   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
     51   %tmp1 = icmp ugt i32 %tmp, 1
     52   br i1 %tmp1, label %bb.outer.then, label %bb.outer.end
     53 
     54 bb.outer.then:                                    ; preds = %bb
     55   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
     56   store i32 0, i32 addrspace(1)* %tmp4, align 4
     57   %tmp5 = icmp eq i32 %tmp, 2
     58   br i1 %tmp5, label %bb.inner.end, label %bb.inner.then
     59 
     60 bb.inner.then:                                    ; preds = %bb.outer.then
     61   %tmp7 = add i32 %tmp, 1
     62   %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
     63   store i32 1, i32 addrspace(1)* %tmp8, align 4
     64   br label %bb.inner.end
     65 
     66 bb.inner.end:                                     ; preds = %bb.inner.then, %bb.outer.then
     67   %tmp9 = add i32 %tmp, 2
     68   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9
     69   store i32 2, i32 addrspace(1)* %tmp10, align 4
     70   br label %bb.outer.end
     71 
     72 bb.outer.end:                                     ; preds = %bb.inner.then, %bb
     73   ret void
     74 }
     75 
     76 ; GCN-LABEL: {{^}}nested_if_if_else:
     77 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
     78 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
     79 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
     80 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
     81 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
     82 ; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]]
     83 ; GCN-NEXT: {{^BB[0-9_]+}}:
     84 ; GCN:      store_dword
     85 ; GCN-NEXT: {{^}}[[THEN_INNER]]:
     86 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]]
     87 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]]
     88 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
     89 ; GCN:      store_dword
     90 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
     91 ; GCN-NEXT: s_endpgm
     92 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
     93 bb:
     94   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
     95   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
     96   store i32 0, i32 addrspace(1)* %tmp1, align 4
     97   %tmp2 = icmp ugt i32 %tmp, 1
     98   br i1 %tmp2, label %bb.outer.then, label %bb.outer.end
     99 
    100 bb.outer.then:                                       ; preds = %bb
    101   %tmp5 = icmp eq i32 %tmp, 2
    102   br i1 %tmp5, label %bb.then, label %bb.else
    103 
    104 bb.then:                                             ; preds = %bb.outer.then
    105   %tmp3 = add i32 %tmp, 1
    106   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3
    107   store i32 1, i32 addrspace(1)* %tmp4, align 4
    108   br label %bb.outer.end
    109 
    110 bb.else:                                             ; preds = %bb.outer.then
    111   %tmp7 = add i32 %tmp, 2
    112   %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7
    113   store i32 2, i32 addrspace(1)* %tmp9, align 4
    114   br label %bb.outer.end
    115 
    116 bb.outer.end:                                        ; preds = %bb, %bb.then, %bb.else
    117   ret void
    118 }
    119 
    120 ; GCN-LABEL: {{^}}nested_if_else_if:
    121 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
    122 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
    123 ; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]]
    124 ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]]
    125 ; GCN-NEXT: {{^BB[0-9_]+}}:
    126 ; GCN:      store_dword
    127 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
    128 ; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]]
    129 ; GCN-NEXT: {{^BB[0-9_]+}}:
    130 ; GCN:      store_dword
    131 ; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
    132 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
    133 ; GCN-NEXT: {{^}}[[THEN_OUTER]]:
    134 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
    135 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
    136 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]]
    137 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]]
    138 ; GCN-NEXT: {{^BB[0-9_]+}}:
    139 ; GCN:      store_dword
    140 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
    141 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
    142 ; GCN-NEXT: {{^BB[0-9_]+}}:
    143 ; GCN:      store_dword
    144 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
    145 ; GCN-NEXT: s_endpgm
    146 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
    147 bb:
    148   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
    149   %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
    150   store i32 0, i32 addrspace(1)* %tmp1, align 4
    151   %cc1 = icmp ugt i32 %tmp, 1
    152   br i1 %cc1, label %bb.outer.then, label %bb.outer.else
    153 
    154 bb.outer.then:
    155   %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1
    156   store i32 1, i32 addrspace(1)* %tmp2, align 4
    157   %cc2 = icmp eq i32 %tmp, 2
    158   br i1 %cc2, label %bb.inner.then, label %bb.outer.end
    159 
    160 bb.inner.then:
    161   %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2
    162   store i32 2, i32 addrspace(1)* %tmp3, align 4
    163   br label %bb.outer.end
    164 
    165 bb.outer.else:
    166   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3
    167   store i32 3, i32 addrspace(1)* %tmp4, align 4
    168   %cc3 = icmp eq i32 %tmp, 2
    169   br i1 %cc3, label %bb.inner.then2, label %bb.outer.end
    170 
    171 bb.inner.then2:
    172   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4
    173   store i32 4, i32 addrspace(1)* %tmp5, align 4
    174   br label %bb.outer.end
    175 
    176 bb.outer.end:
    177   ret void
    178 }
    179 
    180 ; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
    181 ; GCN:      s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
    182 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
    183 ; GCN-NEXT: {{^BB[0-9_]+}}:
    184 ; GCN:      store_dword
    185 ; GCN-NEXT: {{^}}[[ENDIF]]:
    186 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
    187 ; GCN:      s_barrier
    188 ; GCN-NEXT: s_endpgm
    189 define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
    190 bb:
    191   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
    192   %tmp1 = icmp ugt i32 %tmp, 1
    193   br i1 %tmp1, label %bb.then, label %bb.end
    194 
    195 bb.then:                                          ; preds = %bb
    196   %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
    197   store i32 0, i32 addrspace(1)* %tmp4, align 4
    198   br label %bb.end
    199 
    200 bb.end:                                           ; preds = %bb.then, %bb
    201   call void @llvm.amdgcn.s.barrier()
    202   ret void
    203 }
    204 
    205 ; Make sure scc liveness is updated if sor_b64 is removed
    206 ; GCN-LABEL: {{^}}scc_liveness:
    207 
    208 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
    209 ; GCN: s_andn2_b64 exec, exec,
    210 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
    211 
    212 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
    213 ; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}}
    214 
    215 ; GCN-NOT: s_or_b64 exec, exec
    216 
    217 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
    218 ; GCN: s_andn2_b64
    219 ; GCN-NEXT: s_cbranch_execnz
    220 
    221 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
    222 ; GCN: buffer_store_dword
    223 ; GCN: buffer_store_dword
    224 ; GCN: buffer_store_dword
    225 ; GCN: buffer_store_dword
    226 ; GCN: s_setpc_b64
    227 define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
    228 bb:
    229   br label %bb1
    230 
    231 bb1:                                              ; preds = %Flow1, %bb1, %bb
    232   %tmp = icmp slt i32 %arg, 519
    233   br i1 %tmp, label %bb2, label %bb1
    234 
    235 bb2:                                              ; preds = %bb1
    236   %tmp3 = icmp eq i32 %arg, 0
    237   br i1 %tmp3, label %bb4, label %bb10
    238 
    239 bb4:                                              ; preds = %bb2
    240   %tmp6 = load float, float addrspace(5)* undef
    241   %tmp7 = fcmp olt float %tmp6, 0.0
    242   br i1 %tmp7, label %bb8, label %Flow
    243 
    244 bb8:                                              ; preds = %bb4
    245   %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1
    246   br label %Flow
    247 
    248 Flow:                                             ; preds = %bb8, %bb4
    249   %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ]
    250   br label %bb10
    251 
    252 bb10:                                             ; preds = %Flow, %bb2
    253   %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ]
    254   br i1 %tmp3, label %bb12, label %Flow1
    255 
    256 Flow1:                                            ; preds = %bb10
    257   br label %bb1
    258 
    259 bb12:                                             ; preds = %bb10
    260   store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16
    261   ret void
    262 }
    263 
    264 declare i32 @llvm.amdgcn.workitem.id.x() #0
    265 declare void @llvm.amdgcn.s.barrier() #1
    266 
    267 attributes #0 = { nounwind readnone speculatable }
    268 attributes #1 = { nounwind convergent }
    269 attributes #2 = { nounwind }
    270