1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s 2 3 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 4 5 ; SI-LABEL: @test_if 6 ; Make sure the i1 values created by the cfg structurizer pass are 7 ; moved using VALU instructions 8 ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 9 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1 10 define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { 11 entry: 12 switch i32 %a, label %default [ 13 i32 0, label %case0 14 i32 1, label %case1 15 ] 16 17 case0: 18 %arrayidx1 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b 19 store i32 0, i32 addrspace(1)* %arrayidx1, align 4 20 br label %end 21 22 case1: 23 %arrayidx5 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b 24 store i32 1, i32 addrspace(1)* %arrayidx5, align 4 25 br label %end 26 27 default: 28 %cmp8 = icmp eq i32 %a, 2 29 %arrayidx10 = getelementptr i32, i32 addrspace(1)* %dst, i32 %b 30 br i1 %cmp8, label %if, label %else 31 32 if: 33 store i32 2, i32 addrspace(1)* %arrayidx10, align 4 34 br label %end 35 36 else: 37 store i32 3, i32 addrspace(1)* %arrayidx10, align 4 38 br label %end 39 40 end: 41 ret void 42 } 43 44 ; SI-LABEL: @simple_test_v_if 45 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} 46 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc 47 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] 48 49 ; SI: ; BB#1 50 ; SI: buffer_store_dword 51 ; SI: s_endpgm 52 53 ; SI: BB1_2: 54 ; SI: s_or_b64 exec, exec, [[BR_SREG]] 55 ; SI: s_endpgm 56 define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { 57 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 58 %is.0 = icmp ne i32 %tid, 0 59 br i1 %is.0, label %store, label %exit 60 61 store: 62 %gep = getelementptr i32, i32 addrspace(1)* %dst, i32 %tid 63 store i32 999, i32 addrspace(1)* %gep 64 ret void 65 66 exit: 67 ret void 68 } 69 70 ; SI-LABEL: @simple_test_v_loop 71 ; SI: v_cmp_ne_i32_e32 vcc, 0, v{{[0-9]+}} 72 ; SI: s_and_saveexec_b64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], vcc 73 ; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] 74 ; SI: s_cbranch_execz BB2_2 75 76 ; SI: ; BB#1: 77 ; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} 78 79 ; SI: BB2_3: 80 ; SI: buffer_load_dword 81 ; SI-DAG: buffer_store_dword 82 ; SI-DAG: v_cmp_eq_i32_e32 vcc, 83 ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] 84 ; SI: s_andn2_b64 exec, exec, [[OR_SREG]] 85 ; SI: s_cbranch_execnz BB2_3 86 87 define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { 88 entry: 89 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 90 %is.0 = icmp ne i32 %tid, 0 91 %limit = add i32 %tid, 64 92 br i1 %is.0, label %loop, label %exit 93 94 loop: 95 %i = phi i32 [%tid, %entry], [%i.inc, %loop] 96 %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i 97 %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i 98 %load = load i32, i32 addrspace(1)* %src 99 store i32 %load, i32 addrspace(1)* %gep.dst 100 %i.inc = add nsw i32 %i, 1 101 %cmp = icmp eq i32 %limit, %i.inc 102 br i1 %cmp, label %exit, label %loop 103 104 exit: 105 ret void 106 } 107 108 ; SI-LABEL: @multi_vcond_loop 109 110 ; Load loop limit from buffer 111 ; Branch to exit if uniformly not taken 112 ; SI: ; BB#0: 113 ; SI: buffer_load_dword [[VBOUND:v[0-9]+]] 114 ; SI: v_cmp_lt_i32_e32 vcc 115 ; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]], vcc 116 ; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]] 117 ; SI: s_cbranch_execz BB3_2 118 119 ; Initialize inner condition to false 120 ; SI: ; BB#1: 121 ; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} 122 ; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] 123 124 ; Clear exec bits for workitems that load -1s 125 ; SI: BB3_3: 126 ; SI: buffer_load_dword [[B:v[0-9]+]] 127 ; SI: buffer_load_dword [[A:v[0-9]+]] 128 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] 129 ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] 130 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] 131 ; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] 132 ; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] 133 ; SI: s_cbranch_execz BB3_5 134 135 ; SI: BB#4: 136 ; SI: buffer_store_dword 137 ; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] 138 ; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]] 139 140 ; SI: BB3_5: 141 ; SI: s_or_b64 exec, exec, [[ORNEG2]] 142 ; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]] 143 ; SI: s_andn2_b64 exec, exec, [[COND_STATE]] 144 ; SI: s_cbranch_execnz BB3_3 145 146 ; SI: BB#6 147 ; SI: s_or_b64 exec, exec, [[COND_STATE]] 148 149 ; SI: BB3_2: 150 ; SI-NOT: [[COND_STATE]] 151 ; SI: s_endpgm 152 153 define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { 154 bb: 155 %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 156 %tmp4 = sext i32 %tmp to i64 157 %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4 158 %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4 159 %tmp7 = icmp sgt i32 %tmp6, 0 160 %tmp8 = sext i32 %tmp6 to i64 161 br i1 %tmp7, label %bb10, label %bb26 162 163 bb10: ; preds = %bb, %bb20 164 %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ] 165 %tmp12 = add nsw i64 %tmp11, %tmp4 166 %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12 167 %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4 168 %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12 169 %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4 170 %tmp17 = icmp ne i32 %tmp14, -1 171 %tmp18 = icmp ne i32 %tmp16, -1 172 %tmp19 = and i1 %tmp17, %tmp18 173 br i1 %tmp19, label %bb20, label %bb26 174 175 bb20: ; preds = %bb10 176 %tmp21 = add nsw i32 %tmp16, %tmp14 177 %tmp22 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp12 178 store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4 179 %tmp23 = add nuw nsw i64 %tmp11, 1 180 %tmp24 = icmp slt i64 %tmp23, %tmp8 181 br i1 %tmp24, label %bb10, label %bb26 182 183 bb26: ; preds = %bb10, %bb20, %bb 184 ret void 185 } 186 187 attributes #0 = { nounwind readnone } 188 attributes #1 = { nounwind } 189