1 ; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 3 4 ; GCN-LABEL: {{^}}uniform_if_scc: 5 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0 6 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 7 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 8 9 ; Fall-through to the else 10 ; GCN: s_mov_b32 [[S_VAL]], 1 11 12 ; GCN: [[IF_LABEL]]: 13 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 14 ; GCN: buffer_store_dword [[V_VAL]] 15 define amdgpu_kernel void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) { 16 entry: 17 %cmp0 = icmp eq i32 %cond, 0 18 br i1 %cmp0, label %if, label %else 19 20 if: 21 br label %done 22 23 else: 24 br label %done 25 26 done: 27 %value = phi i32 [0, %if], [1, %else] 28 store i32 %value, i32 addrspace(1)* %out 29 ret void 30 } 31 32 ; GCN-LABEL: {{^}}uniform_if_vcc: 33 ; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 34 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 35 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 36 37 ; Fall-through to the else 38 ; GCN: s_mov_b32 [[S_VAL]], 1 39 40 ; GCN: [[IF_LABEL]]: 41 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 42 ; GCN: buffer_store_dword [[V_VAL]] 43 define amdgpu_kernel void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) { 44 entry: 45 %cmp0 = fcmp oeq float %cond, 0.0 46 br i1 %cmp0, label %if, label %else 47 48 if: 49 br label %done 50 51 else: 52 br label %done 53 54 done: 55 %value = phi i32 [0, %if], [1, %else] 56 store i32 %value, i32 addrspace(1)* %out 57 ret void 58 } 59 60 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc: 61 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0 62 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 63 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 64 65 ; Fall-through to the else 66 ; GCN: s_mov_b32 [[S_VAL]], 1 67 68 ; GCN: [[IF_LABEL]]: 69 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 70 ; GCN: buffer_store_dword [[V_VAL]] 71 define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) { 72 entry: 73 %cmp0 = icmp eq i32 %cond, 0 74 br i1 %cmp0, label %else, label %if 75 76 if: 77 br label %done 78 79 else: 80 br label %done 81 82 done: 83 %value = phi i32 [0, %if], [1, %else] 84 store i32 %value, i32 addrspace(1)* %out 85 ret void 86 } 87 88 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc: 89 ; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} 90 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 91 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 92 93 ; Fall-through to the else 94 ; GCN: s_mov_b32 [[S_VAL]], 1 95 96 ; GCN: [[IF_LABEL]]: 97 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 98 ; GCN: buffer_store_dword [[V_VAL]] 99 define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) { 100 entry: 101 %cmp0 = fcmp oeq float %cond, 0.0 102 br i1 %cmp0, label %else, label %if 103 104 if: 105 br label %done 106 107 else: 108 br label %done 109 110 done: 111 %value = phi i32 [0, %if], [1, %else] 112 store i32 %value, i32 addrspace(1)* %out 113 ret void 114 } 115 116 ; GCN-LABEL: {{^}}uniform_if_move_valu: 117 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] 118 ; Using a floating-point value in an integer compare will cause the compare to 119 ; be selected for the SALU and then later moved to the VALU. 120 ; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]] 121 ; GCN: s_and_b64 vcc, exec, [[COND]] 122 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 123 ; GCN: buffer_store_dword 124 ; GCN: [[ENDIF_LABEL]]: 125 ; GCN: s_endpgm 126 define amdgpu_kernel void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) { 127 entry: 128 %a.0 = fadd float %a, 10.0 129 %cond = bitcast float %a.0 to i32 130 %cmp = icmp eq i32 %cond, 5 131 br i1 %cmp, label %if, label %endif 132 133 if: 134 store i32 0, i32 addrspace(1)* %out 135 br label %endif 136 137 endif: 138 ret void 139 } 140 141 ; GCN-LABEL: {{^}}uniform_if_move_valu_commute: 142 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]] 143 ; Using a floating-point value in an integer compare will cause the compare to 144 ; be selected for the SALU and then later moved to the VALU. 145 ; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]] 146 ; GCN: s_and_b64 vcc, exec, [[COND]] 147 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]] 148 ; GCN: buffer_store_dword 149 ; GCN: [[ENDIF_LABEL]]: 150 ; GCN: s_endpgm 151 define amdgpu_kernel void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) { 152 entry: 153 %a.0 = fadd float %a, 10.0 154 %cond = bitcast float %a.0 to i32 155 %cmp = icmp ugt i32 %cond, 5 156 br i1 %cmp, label %if, label %endif 157 158 if: 159 store i32 0, i32 addrspace(1)* %out 160 br label %endif 161 162 endif: 163 ret void 164 } 165 166 167 ; GCN-LABEL: {{^}}uniform_if_else_ret: 168 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 169 ; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 170 171 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 172 ; GCN: buffer_store_dword [[TWO]] 173 ; GCN: s_endpgm 174 175 ; GCN: {{^}}[[IF_LABEL]]: 176 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 177 ; GCN: buffer_store_dword [[ONE]] 178 ; GCN: s_endpgm 179 define amdgpu_kernel void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) { 180 entry: 181 %cmp = icmp eq i32 %a, 0 182 br i1 %cmp, label %if.then, label %if.else 183 184 if.then: ; preds = %entry 185 store i32 1, i32 addrspace(1)* %out 186 br label %if.end 187 188 if.else: ; preds = %entry 189 store i32 2, i32 addrspace(1)* %out 190 br label %if.end 191 192 if.end: ; preds = %if.else, %if.then 193 ret void 194 } 195 196 ; GCN-LABEL: {{^}}uniform_if_else: 197 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 198 ; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]] 199 200 ; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2 201 ; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]] 202 203 ; GCN: [[IF_LABEL]]: 204 ; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1 205 206 ; GCN-NEXT: [[ENDIF_LABEL]]: 207 ; GCN: buffer_store_dword [[IMM_REG]] 208 209 ; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3 210 ; GCN: buffer_store_dword [[THREE]] 211 ; GCN: s_endpgm 212 define amdgpu_kernel void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) { 213 entry: 214 %cmp = icmp eq i32 %a, 0 215 br i1 %cmp, label %if.then, label %if.else 216 217 if.then: ; preds = %entry 218 store i32 1, i32 addrspace(1)* %out0 219 br label %if.end 220 221 if.else: ; preds = %entry 222 store i32 2, i32 addrspace(1)* %out0 223 br label %if.end 224 225 if.end: ; preds = %if.else, %if.then 226 store i32 3, i32 addrspace(1)* %out1 227 ret void 228 } 229 230 ; GCN-LABEL: {{^}}icmp_2_users: 231 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1 232 ; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]] 233 ; GCN: buffer_store_dword 234 ; GCN: [[LABEL]]: 235 ; GCN: s_endpgm 236 define amdgpu_kernel void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) { 237 main_body: 238 %0 = icmp sgt i32 %cond, 0 239 %1 = sext i1 %0 to i32 240 br i1 %0, label %IF, label %ENDIF 241 242 IF: 243 store i32 %1, i32 addrspace(1)* %out 244 br label %ENDIF 245 246 ENDIF: ; preds = %IF, %main_body 247 ret void 248 } 249 250 ; GCN-LABEL: {{^}}icmp_users_different_blocks: 251 ; GCN: s_load_dwordx2 s{{\[}}[[COND0:[0-9]+]]:[[COND1:[0-9]+]]{{\]}} 252 ; GCN: s_cmp_lt_i32 s[[COND0]], 1 253 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] 254 ; GCN: v_cmp_gt_i32_e64 {{[^,]*}}, s[[COND1]], 0{{$}} 255 ; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]] 256 ; GCN: {{^}}[[EXIT]]: 257 ; GCN: s_endpgm 258 ; GCN: {{^}}[[BODY]]: 259 ; GCN: buffer_store 260 ; GCN: s_endpgm 261 define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) { 262 bb: 263 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 264 %cmp0 = icmp sgt i32 %cond0, 0 265 %cmp1 = icmp sgt i32 %cond1, 0 266 br i1 %cmp0, label %bb2, label %bb9 267 268 bb2: ; preds = %bb 269 %tmp2 = sext i1 %cmp1 to i32 270 %tmp3 = add i32 %tmp2, %tmp 271 br i1 %cmp1, label %bb9, label %bb7 272 273 bb7: ; preds = %bb5 274 store i32 %tmp3, i32 addrspace(1)* %out 275 br label %bb9 276 277 bb9: ; preds = %bb8, %bb4 278 ret void 279 } 280 281 ; SI-LABEL: {{^}}uniform_loop: 282 ; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]: 283 ; SI: s_add_i32 [[I:s[0-9]+]], s{{[0-9]+}}, -1 284 ; SI: s_cmp_lg_u32 [[I]], 0 285 ; SI: s_cbranch_scc1 [[LOOP_LABEL]] 286 ; SI: s_endpgm 287 define amdgpu_kernel void @uniform_loop(i32 addrspace(1)* %out, i32 %a) { 288 entry: 289 br label %loop 290 291 loop: 292 %i = phi i32 [0, %entry], [%i.i, %loop] 293 %i.i = add i32 %i, 1 294 %cmp = icmp eq i32 %a, %i.i 295 br i1 %cmp, label %done, label %loop 296 297 done: 298 ret void 299 } 300 301 ; Test uniform and divergent. 302 303 ; GCN-LABEL: {{^}}uniform_inside_divergent: 304 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 305 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 306 ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0 307 ; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]] 308 ; GCN: s_endpgm 309 ; GCN: {{^}}[[IF_UNIFORM_LABEL]]: 310 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 311 ; GCN: buffer_store_dword [[ONE]] 312 define amdgpu_kernel void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) { 313 entry: 314 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 315 %d_cmp = icmp ult i32 %tid, 16 316 br i1 %d_cmp, label %if, label %endif 317 318 if: 319 store i32 0, i32 addrspace(1)* %out 320 %u_cmp = icmp eq i32 %cond, 0 321 br i1 %u_cmp, label %if_uniform, label %endif 322 323 if_uniform: 324 store i32 1, i32 addrspace(1)* %out 325 br label %endif 326 327 endif: 328 ret void 329 } 330 331 ; GCN-LABEL: {{^}}divergent_inside_uniform: 332 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 333 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]] 334 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}} 335 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 336 ; GCN: ; mask branch [[ENDIF_LABEL]] 337 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 338 ; GCN: buffer_store_dword [[ONE]] 339 ; GCN: [[ENDIF_LABEL]]: 340 ; GCN: s_endpgm 341 define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) { 342 entry: 343 %u_cmp = icmp eq i32 %cond, 0 344 br i1 %u_cmp, label %if, label %endif 345 346 if: 347 store i32 0, i32 addrspace(1)* %out 348 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 349 %d_cmp = icmp ult i32 %tid, 16 350 br i1 %d_cmp, label %if_uniform, label %endif 351 352 if_uniform: 353 store i32 1, i32 addrspace(1)* %out 354 br label %endif 355 356 endif: 357 ret void 358 } 359 360 ; GCN-LABEL: {{^}}divergent_if_uniform_if: 361 ; GCN: v_cmp_eq_u32_e32 vcc, 0, v0 362 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc 363 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1 364 ; GCN: buffer_store_dword [[ONE]] 365 ; GCN: s_or_b64 exec, exec, [[MASK]] 366 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0 367 ; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]] 368 ; GCN: s_endpgm 369 ; GCN: [[IF_UNIFORM]]: 370 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 371 ; GCN: buffer_store_dword [[TWO]] 372 define amdgpu_kernel void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) { 373 entry: 374 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 375 %d_cmp = icmp eq i32 %tid, 0 376 br i1 %d_cmp, label %if, label %endif 377 378 if: 379 store i32 1, i32 addrspace(1)* %out 380 br label %endif 381 382 endif: 383 %u_cmp = icmp eq i32 %cond, 0 384 br i1 %u_cmp, label %if_uniform, label %exit 385 386 if_uniform: 387 store i32 2, i32 addrspace(1)* %out 388 br label %exit 389 390 exit: 391 ret void 392 } 393 394 ; The condition of the branches in the two blocks are 395 ; uniform. MachineCSE replaces the 2nd condition with the inverse of 396 ; the first, leaving an scc use in a different block than it was 397 ; defed. 398 399 ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks: 400 ; GCN: s_load_dword [[COND:s[0-9]+]] 401 ; GCN: s_cmp_lt_i32 [[COND]], 1 402 ; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3 403 404 ; GCN: %bb.1: 405 ; GCN-NOT: cmp 406 ; GCN: buffer_load_dword 407 ; GCN: buffer_store_dword 408 ; GCN: s_cbranch_scc1 BB[[FNNUM]]_3 409 410 ; GCN: BB[[FNNUM]]_3: 411 ; GCN: s_endpgm 412 define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) { 413 bb: 414 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 415 %tmp1 = icmp sgt i32 %cond, 0 416 br i1 %tmp1, label %bb2, label %bb9 417 418 bb2: ; preds = %bb 419 %tmp3 = load volatile i32, i32 addrspace(1)* undef 420 store volatile i32 0, i32 addrspace(1)* undef 421 %tmp9 = icmp sle i32 %cond, 0 422 br i1 %tmp9, label %bb9, label %bb7 423 424 bb7: ; preds = %bb5 425 store i32 %tmp3, i32 addrspace(1)* %out 426 br label %bb9 427 428 bb9: ; preds = %bb8, %bb4 429 ret void 430 } 431 432 ; GCN-LABEL: {{^}}uniform_if_scc_i64_eq: 433 ; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 434 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 435 ; SI-DAG: v_cmp_eq_u64_e64 436 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 437 438 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 439 440 ; Fall-through to the else 441 ; GCN: s_mov_b32 [[S_VAL]], 1 442 443 ; GCN: [[IF_LABEL]]: 444 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 445 ; GCN: buffer_store_dword [[V_VAL]] 446 define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) { 447 entry: 448 %cmp0 = icmp eq i64 %cond, 0 449 br i1 %cmp0, label %if, label %else 450 451 if: 452 br label %done 453 454 else: 455 br label %done 456 457 done: 458 %value = phi i32 [0, %if], [1, %else] 459 store i32 %value, i32 addrspace(1)* %out 460 ret void 461 } 462 463 ; GCN-LABEL: {{^}}uniform_if_scc_i64_ne: 464 ; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0 465 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 466 467 ; SI-DAG: v_cmp_ne_u64_e64 468 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 469 470 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]] 471 472 ; Fall-through to the else 473 ; GCN: s_mov_b32 [[S_VAL]], 1 474 475 ; GCN: [[IF_LABEL]]: 476 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 477 ; GCN: buffer_store_dword [[V_VAL]] 478 define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) { 479 entry: 480 %cmp0 = icmp ne i64 %cond, 0 481 br i1 %cmp0, label %if, label %else 482 483 if: 484 br label %done 485 486 else: 487 br label %done 488 489 done: 490 %value = phi i32 [0, %if], [1, %else] 491 store i32 %value, i32 addrspace(1)* %out 492 ret void 493 } 494 495 ; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt: 496 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0 497 ; GCN-DAG: v_cmp_gt_i64_e64 498 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] 499 500 ; Fall-through to the else 501 ; GCN: s_mov_b32 [[S_VAL]], 1 502 503 ; GCN: [[IF_LABEL]]: 504 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]] 505 ; GCN: buffer_store_dword [[V_VAL]] 506 define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) { 507 entry: 508 %cmp0 = icmp sgt i64 %cond, 0 509 br i1 %cmp0, label %if, label %else 510 511 if: 512 br label %done 513 514 else: 515 br label %done 516 517 done: 518 %value = phi i32 [0, %if], [1, %else] 519 store i32 %value, i32 addrspace(1)* %out 520 ret void 521 } 522 523 ; GCN-LABEL: {{^}}move_to_valu_i64_eq: 524 ; GCN: v_cmp_eq_u64_e32 525 define amdgpu_kernel void @move_to_valu_i64_eq(i32 addrspace(1)* %out) { 526 %cond = load volatile i64, i64 addrspace(3)* undef 527 %cmp0 = icmp eq i64 %cond, 0 528 br i1 %cmp0, label %if, label %else 529 530 if: 531 br label %done 532 533 else: 534 br label %done 535 536 done: 537 %value = phi i32 [0, %if], [1, %else] 538 store i32 %value, i32 addrspace(1)* %out 539 ret void 540 } 541 542 ; GCN-LABEL: {{^}}move_to_valu_i64_ne: 543 ; GCN: v_cmp_ne_u64_e32 544 define amdgpu_kernel void @move_to_valu_i64_ne(i32 addrspace(1)* %out) { 545 %cond = load volatile i64, i64 addrspace(3)* undef 546 %cmp0 = icmp ne i64 %cond, 0 547 br i1 %cmp0, label %if, label %else 548 549 if: 550 br label %done 551 552 else: 553 br label %done 554 555 done: 556 %value = phi i32 [0, %if], [1, %else] 557 store i32 %value, i32 addrspace(1)* %out 558 ret void 559 } 560 561 ; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi: 562 ; GCN: v_add_{{[iu]}}32_e32 563 ; GCN: ds_write_b32 564 define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) { 565 bb0: 566 br label %bb1 567 568 bb1: ; preds = %bb3, %bb0 569 %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ] 570 %tmp1 = add nsw i32 %tmp0, -1 571 %tmp2 = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tmp1 572 br i1 undef, label %bb2, label %bb3 573 574 bb2: ; preds = %bb1 575 store volatile i32 1, i32 addrspace(3)* %tmp2, align 4 576 br label %bb3 577 578 bb3: ; preds = %bb2, %bb1 579 %tmp4 = add nsw i32 %tmp0, 2 580 br label %bb1 581 } 582 583 declare i32 @llvm.amdgcn.workitem.id.x() #0 584 585 attributes #0 = { nounwind readnone } 586