Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
      2 
      3 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
      4 ; CHECK-NEXT: ; BB#0:
      5 ; CHECK-NEXT: s_endpgm
      6 define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
      7   call void @llvm.AMDGPU.kill(float 0.0)
      8   ret void
      9 }
     10 
     11 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
     12 ; CHECK-NEXT: ; BB#0:
     13 ; CHECK-NEXT: s_mov_b64 exec, 0
     14 ; CHECK-NEXT: ; BB#1:
     15 ; CHECK-NEXT: s_endpgm
     16 define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
     17   call void @llvm.AMDGPU.kill(float -0.0)
     18   ret void
     19 }
     20 
     21 ; FIXME: Ideally only one would be emitted
     22 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2:
     23 ; CHECK-NEXT: ; BB#0:
     24 ; CHECK-NEXT: s_mov_b64 exec, 0
     25 ; CHECK-NEXT: ; BB#1:
     26 ; CHECK-NEXT: s_mov_b64 exec, 0
     27 ; CHECK-NEXT: ; BB#2:
     28 ; CHECK-NEXT: s_endpgm
     29 define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
     30   call void @llvm.AMDGPU.kill(float -0.0)
     31   call void @llvm.AMDGPU.kill(float -1.0)
     32   ret void
     33 }
     34 
     35 ; CHECK-LABEL: {{^}}test_kill_depth_var:
     36 ; CHECK-NEXT: ; BB#0:
     37 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
     38 ; CHECK-NEXT: ; BB#1:
     39 ; CHECK-NEXT: s_endpgm
     40 define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
     41   call void @llvm.AMDGPU.kill(float %x)
     42   ret void
     43 }
     44 
     45 ; FIXME: Ideally only one would be emitted
     46 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same:
     47 ; CHECK-NEXT: ; BB#0:
     48 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
     49 ; CHECK-NEXT: ; BB#1:
     50 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
     51 ; CHECK-NEXT: ; BB#2:
     52 ; CHECK-NEXT: s_endpgm
     53 define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
     54   call void @llvm.AMDGPU.kill(float %x)
     55   call void @llvm.AMDGPU.kill(float %x)
     56   ret void
     57 }
     58 
     59 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2:
     60 ; CHECK-NEXT: ; BB#0:
     61 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
     62 ; CHECK-NEXT: ; BB#1:
     63 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1
     64 ; CHECK-NEXT: ; BB#2:
     65 ; CHECK-NEXT: s_endpgm
     66 define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
     67   call void @llvm.AMDGPU.kill(float %x)
     68   call void @llvm.AMDGPU.kill(float %y)
     69   ret void
     70 }
     71 
     72 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions:
     73 ; CHECK-NEXT: ; BB#0:
     74 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
     75 ; CHECK-NEXT: ; BB#1:
     76 ; CHECK: v_mov_b32_e64 v7, -1
     77 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
     78 ; CHECK-NEXT: ; BB#2:
     79 ; CHECK-NEXT: s_endpgm
     80 define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
     81   call void @llvm.AMDGPU.kill(float %x)
     82   %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"()
     83   call void @llvm.AMDGPU.kill(float %y)
     84   ret void
     85 }
     86 
     87 ; FIXME: why does the skip depend on the asm length in the same block?
     88 
     89 ; CHECK-LABEL: {{^}}test_kill_control_flow:
     90 ; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
     91 ; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
     92 
     93 ; CHECK-NEXT: ; BB#1:
     94 ; CHECK: v_mov_b32_e64 v7, -1
     95 ; CHECK: v_nop_e64
     96 ; CHECK: v_nop_e64
     97 ; CHECK: v_nop_e64
     98 ; CHECK: v_nop_e64
     99 ; CHECK: v_nop_e64
    100 ; CHECK: v_nop_e64
    101 ; CHECK: v_nop_e64
    102 ; CHECK: v_nop_e64
    103 ; CHECK: v_nop_e64
    104 ; CHECK: v_nop_e64
    105 
    106 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
    107 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
    108 ; CHECK-NEXT: ; BB#3:
    109 ; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
    110 ; CHECK-NEXT: s_endpgm
    111 
    112 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
    113 ; CHECK-NEXT: s_endpgm
    114 define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
    115 entry:
    116   %cmp = icmp eq i32 %arg, 0
    117   br i1 %cmp, label %bb, label %exit
    118 
    119 bb:
    120   %var = call float asm sideeffect "
    121     v_mov_b32_e64 v7, -1
    122     v_nop_e64
    123     v_nop_e64
    124     v_nop_e64
    125     v_nop_e64
    126     v_nop_e64
    127     v_nop_e64
    128     v_nop_e64
    129     v_nop_e64
    130     v_nop_e64
    131     v_nop_e64", "={VGPR7}"()
    132   call void @llvm.AMDGPU.kill(float %var)
    133   br label %exit
    134 
    135 exit:
    136   ret void
    137 }
    138 
    139 ; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
    140 ; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
    141 ; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
    142 
    143 ; CHECK-NEXT: ; BB#1: ; %bb
    144 ; CHECK: v_mov_b32_e64 v7, -1
    145 ; CHECK: v_nop_e64
    146 ; CHECK: v_nop_e64
    147 ; CHECK: v_nop_e64
    148 ; CHECK: v_nop_e64
    149 ; CHECK: v_nop_e64
    150 ; CHECK: v_nop_e64
    151 ; CHECK: v_nop_e64
    152 ; CHECK: v_nop_e64
    153 ; CHECK: ;;#ASMEND
    154 ; CHECK: v_mov_b32_e64 v8, -1
    155 ; CHECK: ;;#ASMEND
    156 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
    157 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
    158 
    159 ; CHECK-NEXT: ; BB#4:
    160 ; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
    161 ; CHECK-NEXT: s_endpgm
    162 
    163 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
    164 ; CHECK: buffer_store_dword v8
    165 ; CHECK: v_mov_b32_e64 v9, -2
    166 
    167 ; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
    168 ; CHECK: buffer_store_dword v9
    169 ; CHECK-NEXT: s_endpgm
    170 define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
    171 entry:
    172   %cmp = icmp eq i32 %arg, 0
    173   br i1 %cmp, label %bb, label %exit
    174 
    175 bb:
    176   %var = call float asm sideeffect "
    177     v_mov_b32_e64 v7, -1
    178     v_nop_e64
    179     v_nop_e64
    180     v_nop_e64
    181     v_nop_e64
    182     v_nop_e64
    183     v_nop_e64
    184     v_nop_e64
    185     v_nop_e64
    186     v_nop_e64
    187     v_nop_e64
    188     v_nop_e64", "={VGPR7}"()
    189   %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
    190   call void @llvm.AMDGPU.kill(float %var)
    191   store volatile float %live.across, float addrspace(1)* undef
    192   %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
    193   br label %exit
    194 
    195 exit:
    196   %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
    197   store float %phi, float addrspace(1)* undef
    198   ret void
    199 }
    200 
    201 ; CHECK-LABEL: {{^}}test_kill_divergent_loop:
    202 ; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0
    203 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc
    204 ; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]]
    205 ; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]]
    206 ; CHECK-NEXT: ; mask branch [[EXIT]]
    207 
    208 ; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]:
    209 
    210 ; CHECK: v_mov_b32_e64 v7, -1
    211 ; CHECK: v_nop_e64
    212 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7
    213 
    214 ; CHECK-NEXT: ; BB#3:
    215 ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]]
    216 ; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]]
    217 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc
    218 ; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]]
    219 
    220 ; CHECK-NEXT: {{^}}[[EXIT]]:
    221 ; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]]
    222 ; CHECK: buffer_store_dword
    223 ; CHECK: s_endpgm
    224 define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
    225 entry:
    226   %cmp = icmp eq i32 %arg, 0
    227   br i1 %cmp, label %bb, label %exit
    228 
    229 bb:
    230   %var = call float asm sideeffect "
    231     v_mov_b32_e64 v7, -1
    232     v_nop_e64
    233     v_nop_e64
    234     v_nop_e64
    235     v_nop_e64
    236     v_nop_e64
    237     v_nop_e64
    238     v_nop_e64
    239     v_nop_e64
    240     v_nop_e64
    241     v_nop_e64", "={VGPR7}"()
    242   call void @llvm.AMDGPU.kill(float %var)
    243   %vgpr = load volatile i32, i32 addrspace(1)* undef
    244   %loop.cond = icmp eq i32 %vgpr, 0
    245   br i1 %loop.cond, label %bb, label %exit
    246 
    247 exit:
    248   store volatile i32 8, i32 addrspace(1)* undef
    249   ret void
    250 }
    251 
    252 
    253 declare void @llvm.AMDGPU.kill(float) #0
    254 
    255 attributes #0 = { nounwind }
    256