Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-early-ifcvt=0 -machine-sink-split-probability-threshold=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; GCN-LABEL: {{^}}uniform_if_scc:
      5 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
      6 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
      7 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
      8 
      9 ; Fall-through to the else
     10 ; GCN: s_mov_b32 [[S_VAL]], 1
     11 
     12 ; GCN: [[IF_LABEL]]:
     13 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
     14 ; GCN: buffer_store_dword [[V_VAL]]
     15 define amdgpu_kernel void @uniform_if_scc(i32 %cond, i32 addrspace(1)* %out) {
     16 entry:
     17   %cmp0 = icmp eq i32 %cond, 0
     18   br i1 %cmp0, label %if, label %else
     19 
     20 if:
     21   br label %done
     22 
     23 else:
     24   br label %done
     25 
     26 done:
     27   %value = phi i32 [0, %if], [1, %else]
     28   store i32 %value, i32 addrspace(1)* %out
     29   ret void
     30 }
     31 
     32 ; GCN-LABEL: {{^}}uniform_if_vcc:
     33 ; GCN-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
     34 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
     35 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
     36 
     37 ; Fall-through to the else
     38 ; GCN: s_mov_b32 [[S_VAL]], 1
     39 
     40 ; GCN: [[IF_LABEL]]:
     41 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
     42 ; GCN: buffer_store_dword [[V_VAL]]
     43 define amdgpu_kernel void @uniform_if_vcc(float %cond, i32 addrspace(1)* %out) {
     44 entry:
     45   %cmp0 = fcmp oeq float %cond, 0.0
     46   br i1 %cmp0, label %if, label %else
     47 
     48 if:
     49   br label %done
     50 
     51 else:
     52   br label %done
     53 
     54 done:
     55   %value = phi i32 [0, %if], [1, %else]
     56   store i32 %value, i32 addrspace(1)* %out
     57   ret void
     58 }
     59 
     60 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_scc:
     61 ; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
     62 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
     63 ; GCN: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
     64 
     65 ; Fall-through to the else
     66 ; GCN: s_mov_b32 [[S_VAL]], 1
     67 
     68 ; GCN: [[IF_LABEL]]:
     69 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
     70 ; GCN: buffer_store_dword [[V_VAL]]
     71 define amdgpu_kernel void @uniform_if_swap_br_targets_scc(i32 %cond, i32 addrspace(1)* %out) {
     72 entry:
     73   %cmp0 = icmp eq i32 %cond, 0
     74   br i1 %cmp0, label %else, label %if
     75 
     76 if:
     77   br label %done
     78 
     79 else:
     80   br label %done
     81 
     82 done:
     83   %value = phi i32 [0, %if], [1, %else]
     84   store i32 %value, i32 addrspace(1)* %out
     85   ret void
     86 }
     87 
     88 ; GCN-LABEL: {{^}}uniform_if_swap_br_targets_vcc:
     89 ; GCN-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}}
     90 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
     91 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
     92 
     93 ; Fall-through to the else
     94 ; GCN: s_mov_b32 [[S_VAL]], 1
     95 
     96 ; GCN: [[IF_LABEL]]:
     97 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
     98 ; GCN: buffer_store_dword [[V_VAL]]
     99 define amdgpu_kernel void @uniform_if_swap_br_targets_vcc(float %cond, i32 addrspace(1)* %out) {
    100 entry:
    101   %cmp0 = fcmp oeq float %cond, 0.0
    102   br i1 %cmp0, label %else, label %if
    103 
    104 if:
    105   br label %done
    106 
    107 else:
    108   br label %done
    109 
    110 done:
    111   %value = phi i32 [0, %if], [1, %else]
    112   store i32 %value, i32 addrspace(1)* %out
    113   ret void
    114 }
    115 
    116 ; GCN-LABEL: {{^}}uniform_if_move_valu:
    117 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
    118 ; Using a floating-point value in an integer compare will cause the compare to
    119 ; be selected for the SALU and then later moved to the VALU.
    120 ; GCN: v_cmp_ne_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 5, [[CMP]]
    121 ; GCN: s_and_b64 vcc, exec, [[COND]]
    122 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
    123 ; GCN: buffer_store_dword
    124 ; GCN: [[ENDIF_LABEL]]:
    125 ; GCN: s_endpgm
    126 define amdgpu_kernel void @uniform_if_move_valu(i32 addrspace(1)* %out, float %a) {
    127 entry:
    128   %a.0 = fadd float %a, 10.0
    129   %cond = bitcast float %a.0 to i32
    130   %cmp = icmp eq i32 %cond, 5
    131   br i1 %cmp, label %if, label %endif
    132 
    133 if:
    134   store i32 0, i32 addrspace(1)* %out
    135   br label %endif
    136 
    137 endif:
    138   ret void
    139 }
    140 
    141 ; GCN-LABEL: {{^}}uniform_if_move_valu_commute:
    142 ; GCN: v_add_f32_e32 [[CMP:v[0-9]+]]
    143 ; Using a floating-point value in an integer compare will cause the compare to
    144 ; be selected for the SALU and then later moved to the VALU.
    145 ; GCN: v_cmp_gt_u32_e32 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 6, [[CMP]]
    146 ; GCN: s_and_b64 vcc, exec, [[COND]]
    147 ; GCN: s_cbranch_vccnz [[ENDIF_LABEL:[0-9_A-Za-z]+]]
    148 ; GCN: buffer_store_dword
    149 ; GCN: [[ENDIF_LABEL]]:
    150 ; GCN: s_endpgm
    151 define amdgpu_kernel void @uniform_if_move_valu_commute(i32 addrspace(1)* %out, float %a) {
    152 entry:
    153   %a.0 = fadd float %a, 10.0
    154   %cond = bitcast float %a.0 to i32
    155   %cmp = icmp ugt i32 %cond, 5
    156   br i1 %cmp, label %if, label %endif
    157 
    158 if:
    159   store i32 0, i32 addrspace(1)* %out
    160   br label %endif
    161 
    162 endif:
    163   ret void
    164 }
    165 
    166 
    167 ; GCN-LABEL: {{^}}uniform_if_else_ret:
    168 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
    169 ; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
    170 
    171 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
    172 ; GCN: buffer_store_dword [[TWO]]
    173 ; GCN: s_endpgm
    174 
    175 ; GCN: {{^}}[[IF_LABEL]]:
    176 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
    177 ; GCN: buffer_store_dword [[ONE]]
    178 ; GCN: s_endpgm
    179 define amdgpu_kernel void @uniform_if_else_ret(i32 addrspace(1)* nocapture %out, i32 %a) {
    180 entry:
    181   %cmp = icmp eq i32 %a, 0
    182   br i1 %cmp, label %if.then, label %if.else
    183 
    184 if.then:                                          ; preds = %entry
    185   store i32 1, i32 addrspace(1)* %out
    186   br label %if.end
    187 
    188 if.else:                                          ; preds = %entry
    189   store i32 2, i32 addrspace(1)* %out
    190   br label %if.end
    191 
    192 if.end:                                           ; preds = %if.else, %if.then
    193   ret void
    194 }
    195 
    196 ; GCN-LABEL: {{^}}uniform_if_else:
    197 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
    198 ; GCN: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
    199 
    200 ; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
    201 ; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
    202 
    203 ; GCN: [[IF_LABEL]]:
    204 ; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
    205 
    206 ; GCN-NEXT: [[ENDIF_LABEL]]:
    207 ; GCN: buffer_store_dword [[IMM_REG]]
    208 
    209 ; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
    210 ; GCN: buffer_store_dword [[THREE]]
    211 ; GCN: s_endpgm
    212 define amdgpu_kernel void @uniform_if_else(i32 addrspace(1)* nocapture %out0, i32 addrspace(1)* nocapture %out1, i32 %a) {
    213 entry:
    214   %cmp = icmp eq i32 %a, 0
    215   br i1 %cmp, label %if.then, label %if.else
    216 
    217 if.then:                                          ; preds = %entry
    218   store i32 1, i32 addrspace(1)* %out0
    219   br label %if.end
    220 
    221 if.else:                                          ; preds = %entry
    222   store i32 2, i32 addrspace(1)* %out0
    223   br label %if.end
    224 
    225 if.end:                                           ; preds = %if.else, %if.then
    226   store i32 3, i32 addrspace(1)* %out1
    227   ret void
    228 }
    229 
    230 ; GCN-LABEL: {{^}}icmp_2_users:
    231 ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 1
    232 ; GCN: s_cbranch_scc1 [[LABEL:[a-zA-Z0-9_]+]]
    233 ; GCN: buffer_store_dword
    234 ; GCN: [[LABEL]]:
    235 ; GCN: s_endpgm
    236 define amdgpu_kernel void @icmp_2_users(i32 addrspace(1)* %out, i32 %cond) {
    237 main_body:
    238   %0 = icmp sgt i32 %cond, 0
    239   %1 = sext i1 %0 to i32
    240   br i1 %0, label %IF, label %ENDIF
    241 
    242 IF:
    243   store i32 %1, i32 addrspace(1)* %out
    244   br label %ENDIF
    245 
    246 ENDIF:                                            ; preds = %IF, %main_body
    247   ret void
    248 }
    249 
    250 ; GCN-LABEL: {{^}}icmp_users_different_blocks:
    251 ; GCN: s_load_dwordx2 s{{\[}}[[COND0:[0-9]+]]:[[COND1:[0-9]+]]{{\]}}
    252 ; GCN: s_cmp_lt_i32 s[[COND0]], 1
    253 ; GCN: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]]
    254 ; GCN: v_cmp_gt_i32_e64 {{[^,]*}}, s[[COND1]], 0{{$}}
    255 ; GCN: s_cbranch_vccz [[BODY:[A-Za-z0-9_]+]]
    256 ; GCN: {{^}}[[EXIT]]:
    257 ; GCN: s_endpgm
    258 ; GCN: {{^}}[[BODY]]:
    259 ; GCN: buffer_store
    260 ; GCN: s_endpgm
    261 define amdgpu_kernel void @icmp_users_different_blocks(i32 %cond0, i32 %cond1, i32 addrspace(1)* %out) {
    262 bb:
    263   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
    264   %cmp0 = icmp sgt i32 %cond0, 0
    265   %cmp1 = icmp sgt i32 %cond1, 0
    266   br i1 %cmp0, label %bb2, label %bb9
    267 
    268 bb2:                                              ; preds = %bb
    269   %tmp2 = sext i1 %cmp1 to i32
    270   %tmp3 = add i32 %tmp2, %tmp
    271   br i1 %cmp1, label %bb9, label %bb7
    272 
    273 bb7:                                              ; preds = %bb5
    274   store i32 %tmp3, i32 addrspace(1)* %out
    275   br label %bb9
    276 
    277 bb9:                                              ; preds = %bb8, %bb4
    278   ret void
    279 }
    280 
    281 ; SI-LABEL: {{^}}uniform_loop:
    282 ; SI: {{^}}[[LOOP_LABEL:[A-Z0-9_a-z]+]]:
    283 ; SI: s_add_i32 [[I:s[0-9]+]],  s{{[0-9]+}}, -1
    284 ; SI: s_cmp_lg_u32 [[I]], 0
    285 ; SI: s_cbranch_scc1 [[LOOP_LABEL]]
    286 ; SI: s_endpgm
    287 define amdgpu_kernel void @uniform_loop(i32 addrspace(1)* %out, i32 %a) {
    288 entry:
    289   br label %loop
    290 
    291 loop:
    292   %i = phi i32 [0, %entry], [%i.i, %loop]
    293   %i.i = add i32 %i, 1
    294   %cmp = icmp eq i32 %a, %i.i
    295   br i1 %cmp, label %done, label %loop
    296 
    297 done:
    298   ret void
    299 }
    300 
    301 ; Test uniform and divergent.
    302 
    303 ; GCN-LABEL: {{^}}uniform_inside_divergent:
    304 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
    305 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
    306 ; GCN: s_cmp_lg_u32 {{s[0-9]+}}, 0
    307 ; GCN: s_cbranch_scc0 [[IF_UNIFORM_LABEL:[A-Z0-9_a-z]+]]
    308 ; GCN: s_endpgm
    309 ; GCN: {{^}}[[IF_UNIFORM_LABEL]]:
    310 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
    311 ; GCN: buffer_store_dword [[ONE]]
    312 define amdgpu_kernel void @uniform_inside_divergent(i32 addrspace(1)* %out, i32 %cond) {
    313 entry:
    314   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
    315   %d_cmp = icmp ult i32 %tid, 16
    316   br i1 %d_cmp, label %if, label %endif
    317 
    318 if:
    319   store i32 0, i32 addrspace(1)* %out
    320   %u_cmp = icmp eq i32 %cond, 0
    321   br i1 %u_cmp, label %if_uniform, label %endif
    322 
    323 if_uniform:
    324   store i32 1, i32 addrspace(1)* %out
    325   br label %endif
    326 
    327 endif:
    328   ret void
    329 }
    330 
    331 ; GCN-LABEL: {{^}}divergent_inside_uniform:
    332 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
    333 ; GCN: s_cbranch_scc1 [[ENDIF_LABEL:[0-9_A-Za-z]+]]
    334 ; GCN: v_cmp_gt_u32_e32 vcc, 16, v{{[0-9]+}}
    335 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
    336 ; GCN: ; mask branch [[ENDIF_LABEL]]
    337 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
    338 ; GCN: buffer_store_dword [[ONE]]
    339 ; GCN: [[ENDIF_LABEL]]:
    340 ; GCN: s_endpgm
    341 define amdgpu_kernel void @divergent_inside_uniform(i32 addrspace(1)* %out, i32 %cond) {
    342 entry:
    343   %u_cmp = icmp eq i32 %cond, 0
    344   br i1 %u_cmp, label %if, label %endif
    345 
    346 if:
    347   store i32 0, i32 addrspace(1)* %out
    348   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
    349   %d_cmp = icmp ult i32 %tid, 16
    350   br i1 %d_cmp, label %if_uniform, label %endif
    351 
    352 if_uniform:
    353   store i32 1, i32 addrspace(1)* %out
    354   br label %endif
    355 
    356 endif:
    357   ret void
    358 }
    359 
    360 ; GCN-LABEL: {{^}}divergent_if_uniform_if:
    361 ; GCN: v_cmp_eq_u32_e32 vcc, 0, v0
    362 ; GCN: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
    363 ; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
    364 ; GCN: buffer_store_dword [[ONE]]
    365 ; GCN: s_or_b64 exec, exec, [[MASK]]
    366 ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
    367 ; GCN: s_cbranch_scc0 [[IF_UNIFORM:[A-Z0-9_]+]]
    368 ; GCN: s_endpgm
    369 ; GCN: [[IF_UNIFORM]]:
    370 ; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
    371 ; GCN: buffer_store_dword [[TWO]]
    372 define amdgpu_kernel void @divergent_if_uniform_if(i32 addrspace(1)* %out, i32 %cond) {
    373 entry:
    374   %tid = call i32 @llvm.amdgcn.workitem.id.x() #0
    375   %d_cmp = icmp eq i32 %tid, 0
    376   br i1 %d_cmp, label %if, label %endif
    377 
    378 if:
    379   store i32 1, i32 addrspace(1)* %out
    380   br label %endif
    381 
    382 endif:
    383   %u_cmp = icmp eq i32 %cond, 0
    384   br i1 %u_cmp, label %if_uniform, label %exit
    385 
    386 if_uniform:
    387   store i32 2, i32 addrspace(1)* %out
    388   br label %exit
    389 
    390 exit:
    391   ret void
    392 }
    393 
    394 ; The condition of the branches in the two blocks are
    395 ; uniform. MachineCSE replaces the 2nd condition with the inverse of
    396 ; the first, leaving an scc use in a different block than it was
    397 ; defed.
    398 
    399 ; GCN-LABEL: {{^}}cse_uniform_condition_different_blocks:
    400 ; GCN: s_load_dword [[COND:s[0-9]+]]
    401 ; GCN: s_cmp_lt_i32 [[COND]], 1
    402 ; GCN: s_cbranch_scc1 BB[[FNNUM:[0-9]+]]_3
    403 
    404 ; GCN: %bb.1:
    405 ; GCN-NOT: cmp
    406 ; GCN: buffer_load_dword
    407 ; GCN: buffer_store_dword
    408 ; GCN: s_cbranch_scc1 BB[[FNNUM]]_3
    409 
    410 ; GCN: BB[[FNNUM]]_3:
    411 ; GCN: s_endpgm
    412 define amdgpu_kernel void @cse_uniform_condition_different_blocks(i32 %cond, i32 addrspace(1)* %out) {
    413 bb:
    414   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0
    415   %tmp1 = icmp sgt i32 %cond, 0
    416   br i1 %tmp1, label %bb2, label %bb9
    417 
    418 bb2:                                              ; preds = %bb
    419   %tmp3 = load volatile i32, i32 addrspace(1)* undef
    420   store volatile i32 0, i32 addrspace(1)* undef
    421   %tmp9 = icmp sle i32 %cond, 0
    422   br i1 %tmp9, label %bb9, label %bb7
    423 
    424 bb7:                                              ; preds = %bb5
    425   store i32 %tmp3, i32 addrspace(1)* %out
    426   br label %bb9
    427 
    428 bb9:                                              ; preds = %bb8, %bb4
    429   ret void
    430 }
    431 
    432 ; GCN-LABEL: {{^}}uniform_if_scc_i64_eq:
    433 ; VI-DAG: s_cmp_eq_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
    434 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
    435 ; SI-DAG: v_cmp_eq_u64_e64
    436 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
    437 
    438 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
    439 
    440 ; Fall-through to the else
    441 ; GCN: s_mov_b32 [[S_VAL]], 1
    442 
    443 ; GCN: [[IF_LABEL]]:
    444 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
    445 ; GCN: buffer_store_dword [[V_VAL]]
    446 define amdgpu_kernel void @uniform_if_scc_i64_eq(i64 %cond, i32 addrspace(1)* %out) {
    447 entry:
    448   %cmp0 = icmp eq i64 %cond, 0
    449   br i1 %cmp0, label %if, label %else
    450 
    451 if:
    452   br label %done
    453 
    454 else:
    455   br label %done
    456 
    457 done:
    458   %value = phi i32 [0, %if], [1, %else]
    459   store i32 %value, i32 addrspace(1)* %out
    460   ret void
    461 }
    462 
    463 ; GCN-LABEL: {{^}}uniform_if_scc_i64_ne:
    464 ; VI-DAG: s_cmp_lg_u64 s{{\[[0-9]+:[0-9]+\]}}, 0
    465 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
    466 
    467 ; SI-DAG: v_cmp_ne_u64_e64
    468 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
    469 
    470 ; VI: s_cbranch_scc1 [[IF_LABEL:[0-9_A-Za-z]+]]
    471 
    472 ; Fall-through to the else
    473 ; GCN: s_mov_b32 [[S_VAL]], 1
    474 
    475 ; GCN: [[IF_LABEL]]:
    476 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
    477 ; GCN: buffer_store_dword [[V_VAL]]
    478 define amdgpu_kernel void @uniform_if_scc_i64_ne(i64 %cond, i32 addrspace(1)* %out) {
    479 entry:
    480   %cmp0 = icmp ne i64 %cond, 0
    481   br i1 %cmp0, label %if, label %else
    482 
    483 if:
    484   br label %done
    485 
    486 else:
    487   br label %done
    488 
    489 done:
    490   %value = phi i32 [0, %if], [1, %else]
    491   store i32 %value, i32 addrspace(1)* %out
    492   ret void
    493 }
    494 
    495 ; GCN-LABEL: {{^}}uniform_if_scc_i64_sgt:
    496 ; GCN-DAG: s_mov_b32 [[S_VAL:s[0-9]+]], 0
    497 ; GCN-DAG: v_cmp_gt_i64_e64
    498 ; GCN: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]]
    499 
    500 ; Fall-through to the else
    501 ; GCN: s_mov_b32 [[S_VAL]], 1
    502 
    503 ; GCN: [[IF_LABEL]]:
    504 ; GCN: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[S_VAL]]
    505 ; GCN: buffer_store_dword [[V_VAL]]
    506 define amdgpu_kernel void @uniform_if_scc_i64_sgt(i64 %cond, i32 addrspace(1)* %out) {
    507 entry:
    508   %cmp0 = icmp sgt i64 %cond, 0
    509   br i1 %cmp0, label %if, label %else
    510 
    511 if:
    512   br label %done
    513 
    514 else:
    515   br label %done
    516 
    517 done:
    518   %value = phi i32 [0, %if], [1, %else]
    519   store i32 %value, i32 addrspace(1)* %out
    520   ret void
    521 }
    522 
    523 ; GCN-LABEL: {{^}}move_to_valu_i64_eq:
    524 ; GCN: v_cmp_eq_u64_e32
    525 define amdgpu_kernel void @move_to_valu_i64_eq(i32 addrspace(1)* %out) {
    526   %cond = load volatile i64, i64 addrspace(3)* undef
    527   %cmp0 = icmp eq i64 %cond, 0
    528   br i1 %cmp0, label %if, label %else
    529 
    530 if:
    531   br label %done
    532 
    533 else:
    534   br label %done
    535 
    536 done:
    537   %value = phi i32 [0, %if], [1, %else]
    538   store i32 %value, i32 addrspace(1)* %out
    539   ret void
    540 }
    541 
    542 ; GCN-LABEL: {{^}}move_to_valu_i64_ne:
    543 ; GCN: v_cmp_ne_u64_e32
    544 define amdgpu_kernel void @move_to_valu_i64_ne(i32 addrspace(1)* %out) {
    545   %cond = load volatile i64, i64 addrspace(3)* undef
    546   %cmp0 = icmp ne i64 %cond, 0
    547   br i1 %cmp0, label %if, label %else
    548 
    549 if:
    550   br label %done
    551 
    552 else:
    553   br label %done
    554 
    555 done:
    556   %value = phi i32 [0, %if], [1, %else]
    557   store i32 %value, i32 addrspace(1)* %out
    558   ret void
    559 }
    560 
    561 ; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi:
    562 ; GCN: v_add_{{[iu]}}32_e32
    563 ; GCN: ds_write_b32
    564 define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) {
    565 bb0:
    566   br label %bb1
    567 
    568 bb1:                                              ; preds = %bb3, %bb0
    569   %tmp0 = phi i32 [ 8, %bb0 ], [ %tmp4, %bb3 ]
    570   %tmp1 = add nsw i32 %tmp0, -1
    571   %tmp2 = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tmp1
    572   br i1 undef, label %bb2, label %bb3
    573 
    574 bb2:                                              ; preds = %bb1
    575   store volatile i32 1, i32 addrspace(3)* %tmp2, align 4
    576   br label %bb3
    577 
    578 bb3:                                              ; preds = %bb2, %bb1
    579   %tmp4 = add nsw i32 %tmp0, 2
    580   br label %bb1
    581 }
    582 
    583 declare i32 @llvm.amdgcn.workitem.id.x() #0
    584 
    585 attributes #0 = { nounwind readnone }
    586