1 ; RUN: llc < %s -march=amdgcn -mcpu=verde -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -asm-verbose=0 -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 3 4 ; FUNC-LABEL: {{^}}break_inserted_outside_of_loop: 5 6 ; SI: [[LOOP_LABEL:[A-Z0-9]+]]: 7 ; Lowered break instructin: 8 ; SI: s_or_b64 9 ; Lowered Loop instruction: 10 ; SI: s_andn2_b64 11 ; s_cbranch_execnz [[LOOP_LABEL]] 12 ; SI: s_endpgm 13 define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) { 14 main_body: 15 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 16 %0 = and i32 %a, %tid 17 %1 = trunc i32 %0 to i1 18 br label %ENDIF 19 20 ENDLOOP: 21 store i32 0, i32 addrspace(1)* %out 22 ret void 23 24 ENDIF: 25 br i1 %1, label %ENDLOOP, label %ENDIF 26 } 27 28 29 ; FUNC-LABEL: {{^}}phi_cond_outside_loop: 30 ; FIXME: This could be folded into the s_or_b64 instruction 31 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0 32 ; SI: [[LOOP_LABEL:[A-Z0-9]+]] 33 ; SI: v_cmp_ne_u32_e32 vcc, 0, v{{[0-9]+}} 34 35 ; SI_IF_BREAK instruction: 36 ; SI: s_or_b64 [[BREAK:s\[[0-9]+:[0-9]+\]]], vcc, [[ZERO]] 37 38 ; SI_LOOP instruction: 39 ; SI: s_andn2_b64 exec, exec, [[BREAK]] 40 ; SI: s_cbranch_execnz [[LOOP_LABEL]] 41 ; SI: s_endpgm 42 43 define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) { 44 entry: 45 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 46 %0 = icmp eq i32 %tid , 0 47 br i1 %0, label %if, label %else 48 49 if: 50 br label %endif 51 52 else: 53 %1 = icmp eq i32 %b, 0 54 br label %endif 55 56 endif: 57 %2 = phi i1 [0, %if], [%1, %else] 58 br label %loop 59 60 loop: 61 br i1 %2, label %exit, label %loop 62 63 exit: 64 ret void 65 } 66 67 ; FIXME: should emit s_endpgm 68 ; CHECK-LABEL: {{^}}switch_unreachable: 69 ; CHECK-NOT: s_endpgm 70 ; CHECK: .Lfunc_end2 71 define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind { 72 centry: 73 switch i32 %x, label %sw.default [ 74 i32 0, label %sw.bb 75 i32 60, label %sw.bb 76 ] 77 78 sw.bb: 79 unreachable 80 81 sw.default: 82 unreachable 83 84 sw.epilog: 85 ret void 86 } 87 88 declare float @llvm.fabs.f32(float) nounwind readnone 89 90 ; This broke the old AMDIL cfg structurizer 91 ; FUNC-LABEL: {{^}}loop_land_info_assert: 92 ; SI: v_cmp_lt_i32_e64 [[CMP4:s\[[0-9:]+\]]], s{{[0-9]+}}, 4{{$}} 93 ; SI: s_and_b64 vcc, exec, [[CMP4]] 94 ; SI-NEXT: s_cbranch_vccnz [[BR1:BB[0-9_]+]] 95 ; SI-NEXT: s_branch [[BR2:BB[0-9_]+]] 96 ; SI-NEXT: BB{{[0-9_]+}}: 97 ; SI-NEXT: buffer_store_dword 98 99 ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]: 100 101 ; SI: [[BR1]]: 102 ; SI-NEXT: s_and_b64 vcc, exec, 103 ; SI-NEXT: s_cbranch_vccnz [[ENDPGM:BB[0-9]+_[0-9]+]] 104 ; SI: s_branch [[INFLOOP]] 105 ; SI-NEXT: [[BR2]]: 106 ; SI: s_cbranch_vccz [[ENDPGM]] 107 108 ; SI: [[ENDPGM]]: 109 ; SI-NEXT: s_endpgm 110 define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind { 111 entry: 112 %cmp = icmp sgt i32 %c0, 0 113 br label %while.cond.outer 114 115 while.cond.outer: 116 %tmp = load float, float addrspace(1)* undef 117 br label %while.cond 118 119 while.cond: 120 %cmp1 = icmp slt i32 %c1, 4 121 br i1 %cmp1, label %convex.exit, label %for.cond 122 123 convex.exit: 124 %or = or i1 %cmp, %cmp1 125 br i1 %or, label %return, label %if.end 126 127 if.end: 128 %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone 129 %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000 130 br i1 %cmp2, label %if.else, label %while.cond.outer 131 132 if.else: 133 store volatile i32 3, i32 addrspace(1)* undef, align 4 134 br label %while.cond 135 136 for.cond: 137 %cmp3 = icmp slt i32 %c3, 1000 138 br i1 %cmp3, label %for.body, label %return 139 140 for.body: 141 br i1 %cmp3, label %self.loop, label %if.end.2 142 143 if.end.2: 144 %or.cond2 = or i1 %cmp3, %arg 145 br i1 %or.cond2, label %return, label %for.cond 146 147 self.loop: 148 br label %self.loop 149 150 return: 151 ret void 152 } 153 154 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 155 156 attributes #0 = { nounwind readnone } 157