1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2 3 ; GCN-LABEL: {{^}}simple_nested_if: 4 ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 5 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]] 6 ; GCN-NEXT: s_cbranch_execz [[ENDIF]] 7 ; GCN: s_and_b64 exec, exec, vcc 8 ; GCN-NEXT: ; mask branch [[ENDIF]] 9 ; GCN-NEXT: {{^BB[0-9_]+}}: 10 ; GCN: store_dword 11 ; GCN-NEXT: {{^}}[[ENDIF]]: 12 ; GCN-NEXT: s_endpgm 13 define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) { 14 bb: 15 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 16 %tmp1 = icmp ugt i32 %tmp, 1 17 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 18 19 bb.outer.then: ; preds = %bb 20 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 21 store i32 0, i32 addrspace(1)* %tmp4, align 4 22 %tmp5 = icmp eq i32 %tmp, 2 23 br i1 %tmp5, label %bb.outer.end, label %bb.inner.then 24 25 bb.inner.then: ; preds = %bb.outer.then 26 %tmp7 = add i32 %tmp, 1 27 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 28 store i32 1, i32 addrspace(1)* %tmp9, align 4 29 br label %bb.outer.end 30 31 bb.outer.end: ; preds = %bb.outer.then, %bb.inner.then, %bb 32 ret void 33 } 34 35 ; GCN-LABEL: {{^}}uncollapsable_nested_if: 36 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 37 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 38 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 39 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 40 ; GCN-NEXT: ; mask branch [[ENDIF_INNER:BB[0-9_]+]] 41 ; GCN-NEXT: {{^BB[0-9_]+}}: 42 ; GCN: store_dword 43 ; GCN-NEXT: {{^}}[[ENDIF_INNER]]: 44 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]] 45 ; GCN: store_dword 46 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 47 ; GCN-NEXT: s_endpgm 48 define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) { 49 bb: 50 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 51 %tmp1 = icmp ugt i32 %tmp, 1 52 br i1 %tmp1, label %bb.outer.then, label %bb.outer.end 53 54 bb.outer.then: ; preds = %bb 55 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 56 store i32 0, i32 addrspace(1)* %tmp4, align 4 57 %tmp5 = icmp eq i32 %tmp, 2 58 br i1 %tmp5, label %bb.inner.end, label %bb.inner.then 59 60 bb.inner.then: ; preds = %bb.outer.then 61 %tmp7 = add i32 %tmp, 1 62 %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 63 store i32 1, i32 addrspace(1)* %tmp8, align 4 64 br label %bb.inner.end 65 66 bb.inner.end: ; preds = %bb.inner.then, %bb.outer.then 67 %tmp9 = add i32 %tmp, 2 68 %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp9 69 store i32 2, i32 addrspace(1)* %tmp10, align 4 70 br label %bb.outer.end 71 72 bb.outer.end: ; preds = %bb.inner.then, %bb 73 ret void 74 } 75 76 ; GCN-LABEL: {{^}}nested_if_if_else: 77 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 78 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 79 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 80 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]] 81 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]] 82 ; GCN-NEXT: ; mask branch [[THEN_INNER:BB[0-9_]+]] 83 ; GCN-NEXT: {{^BB[0-9_]+}}: 84 ; GCN: store_dword 85 ; GCN-NEXT: {{^}}[[THEN_INNER]]: 86 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_INNER3:s\[[0-9:]+\]]], [[SAVEEXEC_INNER2]] 87 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_INNER3]] 88 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]] 89 ; GCN: store_dword 90 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 91 ; GCN-NEXT: s_endpgm 92 define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) { 93 bb: 94 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 95 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 96 store i32 0, i32 addrspace(1)* %tmp1, align 4 97 %tmp2 = icmp ugt i32 %tmp, 1 98 br i1 %tmp2, label %bb.outer.then, label %bb.outer.end 99 100 bb.outer.then: ; preds = %bb 101 %tmp5 = icmp eq i32 %tmp, 2 102 br i1 %tmp5, label %bb.then, label %bb.else 103 104 bb.then: ; preds = %bb.outer.then 105 %tmp3 = add i32 %tmp, 1 106 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp3 107 store i32 1, i32 addrspace(1)* %tmp4, align 4 108 br label %bb.outer.end 109 110 bb.else: ; preds = %bb.outer.then 111 %tmp7 = add i32 %tmp, 2 112 %tmp9 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp7 113 store i32 2, i32 addrspace(1)* %tmp9, align 4 114 br label %bb.outer.end 115 116 bb.outer.end: ; preds = %bb, %bb.then, %bb.else 117 ret void 118 } 119 120 ; GCN-LABEL: {{^}}nested_if_else_if: 121 ; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]] 122 ; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]] 123 ; GCN-NEXT: ; mask branch [[THEN_OUTER:BB[0-9_]+]] 124 ; GCN-NEXT: s_cbranch_execz [[THEN_OUTER]] 125 ; GCN-NEXT: {{^BB[0-9_]+}}: 126 ; GCN: store_dword 127 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]] 128 ; GCN-NEXT: ; mask branch [[THEN_OUTER_FLOW:BB[0-9_]+]] 129 ; GCN-NEXT: {{^BB[0-9_]+}}: 130 ; GCN: store_dword 131 ; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]: 132 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]] 133 ; GCN-NEXT: {{^}}[[THEN_OUTER]]: 134 ; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]] 135 ; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]] 136 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER:BB[0-9_]+]] 137 ; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER]] 138 ; GCN-NEXT: {{^BB[0-9_]+}}: 139 ; GCN: store_dword 140 ; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]] 141 ; GCN-NEXT: ; mask branch [[ENDIF_OUTER]] 142 ; GCN-NEXT: {{^BB[0-9_]+}}: 143 ; GCN: store_dword 144 ; GCN-NEXT: {{^}}[[ENDIF_OUTER]]: 145 ; GCN-NEXT: s_endpgm 146 define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) { 147 bb: 148 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 149 %tmp1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 150 store i32 0, i32 addrspace(1)* %tmp1, align 4 151 %cc1 = icmp ugt i32 %tmp, 1 152 br i1 %cc1, label %bb.outer.then, label %bb.outer.else 153 154 bb.outer.then: 155 %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 1 156 store i32 1, i32 addrspace(1)* %tmp2, align 4 157 %cc2 = icmp eq i32 %tmp, 2 158 br i1 %cc2, label %bb.inner.then, label %bb.outer.end 159 160 bb.inner.then: 161 %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 2 162 store i32 2, i32 addrspace(1)* %tmp3, align 4 163 br label %bb.outer.end 164 165 bb.outer.else: 166 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 3 167 store i32 3, i32 addrspace(1)* %tmp4, align 4 168 %cc3 = icmp eq i32 %tmp, 2 169 br i1 %cc3, label %bb.inner.then2, label %bb.outer.end 170 171 bb.inner.then2: 172 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %tmp1, i32 4 173 store i32 4, i32 addrspace(1)* %tmp5, align 4 174 br label %bb.outer.end 175 176 bb.outer.end: 177 ret void 178 } 179 180 ; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier: 181 ; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]] 182 ; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]] 183 ; GCN-NEXT: {{^BB[0-9_]+}}: 184 ; GCN: store_dword 185 ; GCN-NEXT: {{^}}[[ENDIF]]: 186 ; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]] 187 ; GCN: s_barrier 188 ; GCN-NEXT: s_endpgm 189 define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) { 190 bb: 191 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 192 %tmp1 = icmp ugt i32 %tmp, 1 193 br i1 %tmp1, label %bb.then, label %bb.end 194 195 bb.then: ; preds = %bb 196 %tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp 197 store i32 0, i32 addrspace(1)* %tmp4, align 4 198 br label %bb.end 199 200 bb.end: ; preds = %bb.then, %bb 201 call void @llvm.amdgcn.s.barrier() 202 ret void 203 } 204 205 ; Make sure scc liveness is updated if sor_b64 is removed 206 ; GCN-LABEL: {{^}}scc_liveness: 207 208 ; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: 209 ; GCN: s_andn2_b64 exec, exec, 210 ; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] 211 212 ; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen 213 ; GCN: s_and_b64 exec, exec, {{vcc|s\[[0-9:]+\]}} 214 215 ; GCN-NOT: s_or_b64 exec, exec 216 217 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 218 ; GCN: s_andn2_b64 219 ; GCN-NEXT: s_cbranch_execnz 220 221 ; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} 222 ; GCN: buffer_store_dword 223 ; GCN: buffer_store_dword 224 ; GCN: buffer_store_dword 225 ; GCN: buffer_store_dword 226 ; GCN: s_setpc_b64 227 define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { 228 bb: 229 br label %bb1 230 231 bb1: ; preds = %Flow1, %bb1, %bb 232 %tmp = icmp slt i32 %arg, 519 233 br i1 %tmp, label %bb2, label %bb1 234 235 bb2: ; preds = %bb1 236 %tmp3 = icmp eq i32 %arg, 0 237 br i1 %tmp3, label %bb4, label %bb10 238 239 bb4: ; preds = %bb2 240 %tmp6 = load float, float addrspace(5)* undef 241 %tmp7 = fcmp olt float %tmp6, 0.0 242 br i1 %tmp7, label %bb8, label %Flow 243 244 bb8: ; preds = %bb4 245 %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 246 br label %Flow 247 248 Flow: ; preds = %bb8, %bb4 249 %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] 250 br label %bb10 251 252 bb10: ; preds = %Flow, %bb2 253 %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] 254 br i1 %tmp3, label %bb12, label %Flow1 255 256 Flow1: ; preds = %bb10 257 br label %bb1 258 259 bb12: ; preds = %bb10 260 store volatile <4 x float> %tmp11, <4 x float> addrspace(5)* undef, align 16 261 ret void 262 } 263 264 declare i32 @llvm.amdgcn.workitem.id.x() #0 265 declare void @llvm.amdgcn.s.barrier() #1 266 267 attributes #0 = { nounwind readnone speculatable } 268 attributes #1 = { nounwind convergent } 269 attributes #2 = { nounwind } 270