1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s 2 3 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos: 4 ; CHECK-NEXT: ; BB#0: 5 ; CHECK-NEXT: s_endpgm 6 define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 { 7 call void @llvm.AMDGPU.kill(float 0.0) 8 ret void 9 } 10 11 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg: 12 ; CHECK-NEXT: ; BB#0: 13 ; CHECK-NEXT: s_mov_b64 exec, 0 14 ; CHECK-NEXT: ; BB#1: 15 ; CHECK-NEXT: s_endpgm 16 define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 { 17 call void @llvm.AMDGPU.kill(float -0.0) 18 ret void 19 } 20 21 ; FIXME: Ideally only one would be emitted 22 ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2: 23 ; CHECK-NEXT: ; BB#0: 24 ; CHECK-NEXT: s_mov_b64 exec, 0 25 ; CHECK-NEXT: ; BB#1: 26 ; CHECK-NEXT: s_mov_b64 exec, 0 27 ; CHECK-NEXT: ; BB#2: 28 ; CHECK-NEXT: s_endpgm 29 define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 { 30 call void @llvm.AMDGPU.kill(float -0.0) 31 call void @llvm.AMDGPU.kill(float -1.0) 32 ret void 33 } 34 35 ; CHECK-LABEL: {{^}}test_kill_depth_var: 36 ; CHECK-NEXT: ; BB#0: 37 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 38 ; CHECK-NEXT: ; BB#1: 39 ; CHECK-NEXT: s_endpgm 40 define amdgpu_ps void @test_kill_depth_var(float %x) #0 { 41 call void @llvm.AMDGPU.kill(float %x) 42 ret void 43 } 44 45 ; FIXME: Ideally only one would be emitted 46 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same: 47 ; CHECK-NEXT: ; BB#0: 48 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 49 ; CHECK-NEXT: ; BB#1: 50 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 51 ; CHECK-NEXT: ; BB#2: 52 ; CHECK-NEXT: s_endpgm 53 define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 { 54 call void @llvm.AMDGPU.kill(float %x) 55 call void @llvm.AMDGPU.kill(float %x) 56 ret void 57 } 58 59 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2: 60 ; CHECK-NEXT: ; BB#0: 61 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 62 ; CHECK-NEXT: ; BB#1: 63 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v1 64 ; CHECK-NEXT: ; BB#2: 65 ; CHECK-NEXT: s_endpgm 66 define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 { 67 call void @llvm.AMDGPU.kill(float %x) 68 call void @llvm.AMDGPU.kill(float %y) 69 ret void 70 } 71 72 ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_instructions: 73 ; CHECK-NEXT: ; BB#0: 74 ; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0 75 ; CHECK-NEXT: ; BB#1: 76 ; CHECK: v_mov_b32_e64 v7, -1 77 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 78 ; CHECK-NEXT: ; BB#2: 79 ; CHECK-NEXT: s_endpgm 80 define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 { 81 call void @llvm.AMDGPU.kill(float %x) 82 %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={VGPR7}"() 83 call void @llvm.AMDGPU.kill(float %y) 84 ret void 85 } 86 87 ; FIXME: why does the skip depend on the asm length in the same block? 88 89 ; CHECK-LABEL: {{^}}test_kill_control_flow: 90 ; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 91 ; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 92 93 ; CHECK-NEXT: ; BB#1: 94 ; CHECK: v_mov_b32_e64 v7, -1 95 ; CHECK: v_nop_e64 96 ; CHECK: v_nop_e64 97 ; CHECK: v_nop_e64 98 ; CHECK: v_nop_e64 99 ; CHECK: v_nop_e64 100 ; CHECK: v_nop_e64 101 ; CHECK: v_nop_e64 102 ; CHECK: v_nop_e64 103 ; CHECK: v_nop_e64 104 ; CHECK: v_nop_e64 105 106 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 107 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 108 ; CHECK-NEXT: ; BB#3: 109 ; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 110 ; CHECK-NEXT: s_endpgm 111 112 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 113 ; CHECK-NEXT: s_endpgm 114 define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 { 115 entry: 116 %cmp = icmp eq i32 %arg, 0 117 br i1 %cmp, label %bb, label %exit 118 119 bb: 120 %var = call float asm sideeffect " 121 v_mov_b32_e64 v7, -1 122 v_nop_e64 123 v_nop_e64 124 v_nop_e64 125 v_nop_e64 126 v_nop_e64 127 v_nop_e64 128 v_nop_e64 129 v_nop_e64 130 v_nop_e64 131 v_nop_e64", "={VGPR7}"() 132 call void @llvm.AMDGPU.kill(float %var) 133 br label %exit 134 135 exit: 136 ret void 137 } 138 139 ; CHECK-LABEL: {{^}}test_kill_control_flow_remainder: 140 ; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0 141 ; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]] 142 143 ; CHECK-NEXT: ; BB#1: ; %bb 144 ; CHECK: v_mov_b32_e64 v7, -1 145 ; CHECK: v_nop_e64 146 ; CHECK: v_nop_e64 147 ; CHECK: v_nop_e64 148 ; CHECK: v_nop_e64 149 ; CHECK: v_nop_e64 150 ; CHECK: v_nop_e64 151 ; CHECK: v_nop_e64 152 ; CHECK: v_nop_e64 153 ; CHECK: ;;#ASMEND 154 ; CHECK: v_mov_b32_e64 v8, -1 155 ; CHECK: ;;#ASMEND 156 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 157 ; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]] 158 159 ; CHECK-NEXT: ; BB#4: 160 ; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0 161 ; CHECK-NEXT: s_endpgm 162 163 ; CHECK-NEXT: {{^}}[[SPLIT_BB]]: 164 ; CHECK: buffer_store_dword v8 165 ; CHECK: v_mov_b32_e64 v9, -2 166 167 ; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}: 168 ; CHECK: buffer_store_dword v9 169 ; CHECK-NEXT: s_endpgm 170 define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 { 171 entry: 172 %cmp = icmp eq i32 %arg, 0 173 br i1 %cmp, label %bb, label %exit 174 175 bb: 176 %var = call float asm sideeffect " 177 v_mov_b32_e64 v7, -1 178 v_nop_e64 179 v_nop_e64 180 v_nop_e64 181 v_nop_e64 182 v_nop_e64 183 v_nop_e64 184 v_nop_e64 185 v_nop_e64 186 v_nop_e64 187 v_nop_e64 188 v_nop_e64", "={VGPR7}"() 189 %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"() 190 call void @llvm.AMDGPU.kill(float %var) 191 store volatile float %live.across, float addrspace(1)* undef 192 %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"() 193 br label %exit 194 195 exit: 196 %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ] 197 store float %phi, float addrspace(1)* undef 198 ret void 199 } 200 201 ; CHECK-LABEL: {{^}}test_kill_divergent_loop: 202 ; CHECK: v_cmp_eq_i32_e32 vcc, 0, v0 203 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc 204 ; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] 205 ; CHECK-NEXT: s_cbranch_execz [[EXIT:BB[0-9]+_[0-9]+]] 206 ; CHECK-NEXT: ; mask branch [[EXIT]] 207 208 ; CHECK: [[LOOP_BB:BB[0-9]+_[0-9]+]]: 209 210 ; CHECK: v_mov_b32_e64 v7, -1 211 ; CHECK: v_nop_e64 212 ; CHECK: v_cmpx_le_f32_e32 vcc, 0, v7 213 214 ; CHECK-NEXT: ; BB#3: 215 ; CHECK: buffer_load_dword [[LOAD:v[0-9]+]] 216 ; CHECK: v_cmp_eq_i32_e32 vcc, 0, [[LOAD]] 217 ; CHECK-NEXT: s_and_b64 vcc, exec, vcc 218 ; CHECK-NEXT: s_cbranch_vccnz [[LOOP_BB]] 219 220 ; CHECK-NEXT: {{^}}[[EXIT]]: 221 ; CHECK: s_or_b64 exec, exec, [[SAVEEXEC]] 222 ; CHECK: buffer_store_dword 223 ; CHECK: s_endpgm 224 define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 { 225 entry: 226 %cmp = icmp eq i32 %arg, 0 227 br i1 %cmp, label %bb, label %exit 228 229 bb: 230 %var = call float asm sideeffect " 231 v_mov_b32_e64 v7, -1 232 v_nop_e64 233 v_nop_e64 234 v_nop_e64 235 v_nop_e64 236 v_nop_e64 237 v_nop_e64 238 v_nop_e64 239 v_nop_e64 240 v_nop_e64 241 v_nop_e64", "={VGPR7}"() 242 call void @llvm.AMDGPU.kill(float %var) 243 %vgpr = load volatile i32, i32 addrspace(1)* undef 244 %loop.cond = icmp eq i32 %vgpr, 0 245 br i1 %loop.cond, label %bb, label %exit 246 247 exit: 248 store volatile i32 8, i32 addrspace(1)* undef 249 ret void 250 } 251 252 253 declare void @llvm.AMDGPU.kill(float) #0 254 255 attributes #0 = { nounwind } 256