1 ; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s 2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s 3 4 ; SI-LABEL: {{^}}uniform_if_scc: 5 ; SI-DAG: s_cmp_eq_i32 s{{[0-9]+}}, 0 6 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 7 ; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 8 9 ; Fall-through to the else 10 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1 11 12 ; SI: [[IF_LABEL]]: 13 ; SI: buffer_store_dword [[STORE_VAL]] 14 define void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) { 15 entry: 16 %cmp0 = icmp eq i32 %cond, 0 17 br i1 %cmp0, label %if, label %else 18 19 if: 20 br label %done 21 22 else: 23 br label %done 24 25 done: 26 %value = phi i32 [0, %if], [1, %else] 27 store i32 %value, i32 addrspace(1)* %out 28 ret void 29 } 30 31 ; SI-LABEL: {{^}}uniform_if_vcc: 32 ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and 33 ; also scheduled the write first. 34 ; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} 35 ; SI-DAG: s_and_b64 vcc, exec, [[COND]] 36 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 37 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 38 39 ; Fall-through to the else 40 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1 41 42 ; SI: [[IF_LABEL]]: 43 ; SI: buffer_store_dword [[STORE_VAL]] 44 define void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) { 45 entry: 46 %cmp0 = fcmp oeq float %cond, 0.0 47 br i1 %cmp0, label %if, label %else 48 49 if: 50 br label %done 51 52 else: 53 br label %done 54 55 done: 56 %value = phi i32 [0, %if], [1, %else] 57 store i32 %value, i32 addrspace(1)* %out 58 ret void 59 } 60 61 ; SI-LABEL: {{^}}uniform_if_swap_br_targets_scc: 62 ; SI-DAG: s_cmp_lg_i32 s{{[0-9]+}}, 0 63 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 64 ; SI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 65 66 ; Fall-through to the else 67 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1 68 69 ; SI: [[IF_LABEL]]: 70 ; SI: buffer_store_dword [[STORE_VAL]] 71 define void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) { 72 entry: 73 %cmp0 = icmp eq i32 %cond, 0 74 br i1 %cmp0, label %else, label %if 75 76 if: 77 br label %done 78 79 else: 80 br label %done 81 82 done: 83 %value = phi i32 [0, %if], [1, %else] 84 store i32 %value, i32 addrspace(1)* %out 85 ret void 86 } 87 88 ; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc: 89 ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and 90 ; also scheduled the write first. 91 ; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} 92 ; SI-DAG: s_and_b64 vcc, exec, [[COND]] 93 ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 94 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 95 96 ; Fall-through to the else 97 ; SI: v_mov_b32_e32 [[STORE_VAL]], 1 98 99 ; SI: [[IF_LABEL]]: 100 ; SI: buffer_store_dword [[STORE_VAL]] 101 define void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) { 102 entry: 103 %cmp0 = fcmp oeq float %cond, 0.0 104 br i1 %cmp0, label %else, label %if 105 106 if: 107 br label %done 108 109 else: 110 br label %done 111 112 done: 113 %value = phi i32 [0, %if], [1, %else] 114 store i32 %value, i32 addrspace(1)* %out 115 ret void 116 } 117 118 ; SI-LABEL: {{^}}uniform_if_move_valu: 119 ; SI: v_add_f32_e32 [[CMP:v[0-9]+]] 120 ; Using a floating-point value in an integer compare will cause the compare to 121 ; be selected for the SALU and then later moved to the VALU. 122 ; SI: v_cmp_ne_i32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] 123 ; SI: s_and_b64 vcc, exec, [[COND]] 124 ; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 125 ; SI: buffer_store_dword 126 ; SI: [[ENDIF_LABEL]]: 127 ; SI: s_endpgm 128 define void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { 129 entry: 130 %a.0 = fadd float %a, 10.0 131 %cond = bitcast float %a.0 to i32 132 %cmp = icmp eq i32 %cond, 5 133 br i1 %cmp, label %if, label %endif 134 135 if: 136 store i32 0, i32 addrspace(1)* %out 137 br label %endif 138 139 endif: 140 ret void 141 } 142 143 ; SI-LABEL: {{^}}uniform_if_move_valu_commute: 144 ; SI: v_add_f32_e32 [[CMP:v[0-9]+]] 145 ; Using a floating-point value in an integer compare will cause the compare to 146 ; be selected for the SALU and then later moved to the VALU. 147 ; SI: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] 148 ; SI: s_and_b64 vcc, exec, [[COND]] 149 ; SI: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 150 ; SI: buffer_store_dword 151 ; SI: [[ENDIF_LABEL]]: 152 ; SI: s_endpgm 153 define void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { 154 entry: 155 %a.0 = fadd float %a, 10.0 156 %cond = bitcast float %a.0 to i32 157 %cmp = icmp ugt i32 %cond, 5 158 br i1 %cmp, label %if, label %endif 159 160 if: 161 store i32 0, i32 addrspace(1)* %out 162 br label %endif 163 164 endif: 165 ret void 166 } 167 168 169 ; SI-LABEL: {{^}}uniform_if_else_ret: 170 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 171 ; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 172 173 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 174 ; SI: buffer_store_dword [[TWO]] 175 ; SI: s_endpgm 176 177 ; SI: {{^}}[[IF_LABEL]]: 178 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 179 ; SI: buffer_store_dword [[ONE]] 180 ; SI: s_endpgm 181 define void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) { 182 entry: 183 %cmp = icmp eq i32 %a, 0 184 br i1 %cmp, label %if.then, label %if.else 185 186 if.then: ; preds = %entry 187 store i32 1, i32 addrspace(1)* %out 188 br label %if.end 189 190 if.else: ; preds = %entry 191 store i32 2, i32 addrspace(1)* %out 192 br label %if.end 193 194 if.end: ; preds = %if.else, %if.then 195 ret void 196 } 197 198 ; SI-LABEL: {{^}}uniform_if_else: 199 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 200 ; SI-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 201 202 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 203 ; SI: buffer_store_dword [[TWO]] 204 ; SI: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] 205 206 ; SI: [[IF_LABEL]]: 207 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 208 ; SI: buffer_store_dword [[ONE]] 209 210 ; SI: [[ENDIF_LABEL]]: 211 ; SI: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 212 ; SI: buffer_store_dword [[THREE]] 213 ; SI: s_endpgm 214 define void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) { 215 entry: 216 %cmp = icmp eq i32 %a, 0 217 br i1 %cmp, label %if.then, label %if.else 218 219 if.then: ; preds = %entry 220 store i32 1, i32 addrspace(1)* %out0 221 br label %if.end 222 223 if.else: ; preds = %entry 224 store i32 2, i32 addrspace(1)* %out0 225 br label %if.end 226 227 if.end: ; preds = %if.else, %if.then 228 store i32 3, i32 addrspace(1)* %out1 229 ret void 230 } 231 232 ; SI-LABEL: {{^}}icmp_2_users: 233 ; SI: s_cmp_lt_i32 s{{[0-9]+}}, 1 234 ; SI: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] 235 ; SI: buffer_store_dword 236 ; SI: [[LABEL]]: 237 ; SI: s_endpgm 238 define void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) { 239 main_body: 240 %0 = icmp sgt i32 %cond, 0 241 %1 = sext i1 %0 to i32 242 br i1 %0, label %IF, label %ENDIF 243 244 IF: 245 store i32 %1, i32 addrspace(1)* %out 246 br label %ENDIF 247 248 ENDIF: ; preds = %IF, %main_body 249 ret void 250 } 251 252 ; SI-LABEL: {{^}}icmp_users_different_blocks: 253 ; SI: s_load_dword [[COND:s[0-9]+]] 254 ; SI: s_cmp_lt_i32 [[COND]], 1 255 ; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] 256 ; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]] 257 ; SI: s_and_b64 vcc, exec, [[MASK]] 258 ; SI: s_cbranch_vccnz [[EXIT]] 259 ; SI: buffer_store 260 ; SI: {{^}}[[EXIT]]: 261 ; SI: s_endpgm 262 define void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { 263 bb: 264 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 265 %cmp0 = icmp sgt i32 %cond0, 0 266 %cmp1 = icmp sgt i32 %cond1, 0 267 br i1 %cmp0, label %bb2, label %bb9 268 269 bb2: ; preds = %bb 270 %tmp2 = sext i1 %cmp1 to i32 271 %tmp3 = add i32 %tmp2, %tmp 272 br i1 %cmp1, label %bb9, label %bb7 273 274 bb7: ; preds = %bb5 275 store i32 %tmp3, i32 addrspace(1)* %out 276 br label %bb9 277 278 bb9: ; preds = %bb8, %bb4 279 ret void 280 } 281 282 ; SI-LABEL: {{^}}uniform_loop: 283 ; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: 284 ; FIXME: We need to teach SIFixSGPRCopies about uniform branches so we 285 ; get s_add_i32 here. 286 ; SI: v_add_i32_e32 [[I:v[0-9]+]], vcc, -1, v{{[0-9]+}} 287 ; SI: v_cmp_ne_i32_e32 vcc, 0, [[I]] 288 ; SI: s_and_b64 vcc, exec, vcc 289 ; SI: s_cbranch_vccnz [[LOOP_LABEL]] 290 ; SI: s_endpgm 291 define void @uniform_loop(i32 addrspace(1)* %out, i32 %a) { 292 entry: 293 br label %loop 294 295 loop: 296 %i = phi i32 [0, %entry], [%i.i, %loop] 297 %i.i = add i32 %i, 1 298 %cmp = icmp eq i32 %a, %i.i 299 br i1 %cmp, label %done, label %loop 300 301 done: 302 ret void 303 } 304 305 ; Test uniform and divergent. 306 307 ; SI-LABEL: {{^}}uniform_inside_divergent: 308 ; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 309 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 310 ; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 311 ; SI: s_cbranch_execz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 312 ; SI: s_cmp_lg_i32 {{s[0-9]+}}, 0 313 ; SI: s_cbranch_scc1 [[ENDIF_LABEL]] 314 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 315 ; SI: buffer_store_dword [[ONE]] 316 define void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { 317 entry: 318 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 319 %d_cmp = icmp ult i32 %tid, 16 320 br i1 %d_cmp, label %if, label %endif 321 322 if: 323 store i32 0, i32 addrspace(1)* %out 324 %u_cmp = icmp eq i32 %cond, 0 325 br i1 %u_cmp, label %if_uniform, label %endif 326 327 if_uniform: 328 store i32 1, i32 addrspace(1)* %out 329 br label %endif 330 331 endif: 332 ret void 333 } 334 335 ; SI-LABEL: {{^}}divergent_inside_uniform: 336 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 337 ; SI: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] 338 ; SI: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 339 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 340 ; SI: s_xor_b64 [[MASK1:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 341 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 342 ; SI: buffer_store_dword [[ONE]] 343 ; SI: [[ENDIF_LABEL]]: 344 ; SI: s_endpgm 345 define void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { 346 entry: 347 %u_cmp = icmp eq i32 %cond, 0 348 br i1 %u_cmp, label %if, label %endif 349 350 if: 351 store i32 0, i32 addrspace(1)* %out 352 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 353 %d_cmp = icmp ult i32 %tid, 16 354 br i1 %d_cmp, label %if_uniform, label %endif 355 356 if_uniform: 357 store i32 1, i32 addrspace(1)* %out 358 br label %endif 359 360 endif: 361 ret void 362 } 363 364 ; SI-LABEL: {{^}}divergent_if_uniform_if: 365 ; SI: v_cmp_eq_i32_e32 vcc, 0, v0 366 ; SI: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 367 ; SI: s_xor_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[MASK]] 368 ; SI: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 369 ; SI: buffer_store_dword [[ONE]] 370 ; SI: s_or_b64 exec, exec, [[MASK]] 371 ; SI: s_cmp_lg_i32 s{{[0-9]+}}, 0 372 ; SI: s_cbranch_scc1 [[EXIT:[A-Z0-9_]+]] 373 ; SI: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 374 ; SI: buffer_store_dword [[TWO]] 375 ; SI: [[EXIT]]: 376 ; SI: s_endpgm 377 define void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { 378 entry: 379 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 380 %d_cmp = icmp eq i32 %tid, 0 381 br i1 %d_cmp, label %if, label %endif 382 383 if: 384 store i32 1, i32 addrspace(1)* %out 385 br label %endif 386 387 endif: 388 %u_cmp = icmp eq i32 %cond, 0 389 br i1 %u_cmp, label %if_uniform, label %exit 390 391 if_uniform: 392 store i32 2, i32 addrspace(1)* %out 393 br label %exit 394 395 exit: 396 ret void 397 } 398 399 ; The condition of the branches in the two blocks are 400 ; uniform. MachineCSE replaces the 2nd condition with the inverse of 401 ; the first, leaving an scc use in a different block than it was 402 ; defed. 403 404 ; SI-LABEL: {{^}}cse_uniform_condition_different_blocks: 405 ; SI: s_load_dword [[COND:s[0-9]+]] 406 ; SI: s_cmp_lt_i32 [[COND]], 1 407 ; SI: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 408 409 ; SI: BB#1: 410 ; SI-NOT: cmp 411 ; SI: buffer_load_dword 412 ; SI: buffer_store_dword 413 ; SI: s_cbranch_scc1 BB[[FNNUM]]_3 414 415 ; SI: BB[[FNNUM]]_3: 416 ; SI: s_endpgm 417 define void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { 418 bb: 419 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 420 %tmp1 = icmp sgt i32 %cond, 0 421 br i1 %tmp1, label %bb2, label %bb9 422 423 bb2: ; preds = %bb 424 %tmp3 = load volatile i32, i32 addrspace(1)* undef 425 store volatile i32 0, i32 addrspace(1)* undef 426 %tmp9 = icmp sle i32 %cond, 0 427 br i1 %tmp9, label %bb9, label %bb7 428 429 bb7: ; preds = %bb5 430 store i32 %tmp3, i32 addrspace(1)* %out 431 br label %bb9 432 433 bb9: ; preds = %bb8, %bb4 434 ret void 435 } 436 437 declare i32 @llvm.amdgcn.workitem.id.x() #0 438 439 attributes #0 = { readnone } 440