1 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s 2 3 4 ; CHECK-LABEL: @branch_ptr_var_same_alloca( 5 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @branch_ptr_var_same_alloca.alloca, i32 0, i32 %{{[0-9]+}} 6 7 ; CHECK: if: 8 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a 9 10 ; CHECK: else: 11 ; CHECK: %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %15, i32 0, i32 %b 12 13 ; CHECK: endif: 14 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ %arrayidx1, %else ] 15 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 16 define amdgpu_kernel void @branch_ptr_var_same_alloca(i32 %a, i32 %b) #0 { 17 entry: 18 %alloca = alloca [64 x i32], align 4 19 br i1 undef, label %if, label %else 20 21 if: 22 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 23 br label %endif 24 25 else: 26 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %b 27 br label %endif 28 29 endif: 30 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] 31 store i32 0, i32* %phi.ptr, align 4 32 ret void 33 } 34 35 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_0( 36 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %if ], [ null, %entry ] 37 define amdgpu_kernel void @branch_ptr_phi_alloca_null_0(i32 %a, i32 %b) #0 { 38 entry: 39 %alloca = alloca [64 x i32], align 4 40 br i1 undef, label %if, label %endif 41 42 if: 43 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 44 br label %endif 45 46 endif: 47 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ null, %entry ] 48 store i32 0, i32* %phi.ptr, align 4 49 ret void 50 } 51 52 ; CHECK-LABEL: @branch_ptr_phi_alloca_null_1( 53 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ null, %entry ], [ %arrayidx0, %if ] 54 define amdgpu_kernel void @branch_ptr_phi_alloca_null_1(i32 %a, i32 %b) #0 { 55 entry: 56 %alloca = alloca [64 x i32], align 4 57 br i1 undef, label %if, label %endif 58 59 if: 60 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 61 br label %endif 62 63 endif: 64 %phi.ptr = phi i32* [ null, %entry ], [ %arrayidx0, %if ] 65 store i32 0, i32* %phi.ptr, align 4 66 ret void 67 } 68 69 ; CHECK-LABEL: @one_phi_value( 70 ; CHECK: getelementptr inbounds [256 x [64 x i32]], [256 x [64 x i32]] addrspace(3)* @one_phi_value.alloca, i32 0, i32 %14 71 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(3)* %{{[0-9]+}}, i32 0, i32 %a 72 73 ; CHECK: br label %exit 74 ; CHECK: %phi.ptr = phi i32 addrspace(3)* [ %arrayidx0, %entry ] 75 ; CHECK: store i32 0, i32 addrspace(3)* %phi.ptr, align 4 76 define amdgpu_kernel void @one_phi_value(i32 %a) #0 { 77 entry: 78 %alloca = alloca [64 x i32], align 4 79 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 80 br label %exit 81 82 exit: 83 %phi.ptr = phi i32* [ %arrayidx0, %entry ] 84 store i32 0, i32* %phi.ptr, align 4 85 ret void 86 } 87 88 ; CHECK-LABEL: @branch_ptr_alloca_unknown_obj( 89 ; CHECK: %alloca = alloca [64 x i32], align 4 90 91 ; CHECK: if: 92 ; CHECK: %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 93 94 ; CHECK: else: 95 ; CHECK: %arrayidx1 = call i32* @get_unknown_pointer() 96 97 ; CHECK: endif: 98 ; CHECK: %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] 99 ; CHECK: store i32 0, i32* %phi.ptr, align 4 100 define amdgpu_kernel void @branch_ptr_alloca_unknown_obj(i32 %a, i32 %b) #0 { 101 entry: 102 %alloca = alloca [64 x i32], align 4 103 br i1 undef, label %if, label %else 104 105 if: 106 %arrayidx0 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 %a 107 br label %endif 108 109 else: 110 %arrayidx1 = call i32* @get_unknown_pointer() 111 br label %endif 112 113 endif: 114 %phi.ptr = phi i32* [ %arrayidx0, %if ], [ %arrayidx1, %else ] 115 store i32 0, i32* %phi.ptr, align 4 116 ret void 117 } 118 119 ; kernel void ptr_induction_var_same_alloca(void) 120 ; { 121 ; int alloca[64]; 122 ; int i = 0; 123 124 ; #pragma nounroll 125 ; for (int* p = &alloca[2], *e = &alloca[48]; p != e; ++p, ++i) 126 ; { 127 ; *p = i; 128 ; } 129 ; } 130 131 ; FIXME: This should be promotable. We need to use 132 ; GetUnderlyingObjects when looking at the icmp user. 133 134 ; CHECK-LABEL: @ptr_induction_var_same_alloca( 135 ; CHECK: %alloca = alloca [64 x i32], align 4 136 ; CHECK: phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] 137 define amdgpu_kernel void @ptr_induction_var_same_alloca() #0 { 138 entry: 139 %alloca = alloca [64 x i32], align 4 140 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 141 %arrayidx1 = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 48 142 br label %for.body 143 144 for.cond.cleanup: ; preds = %for.body 145 ret void 146 147 for.body: ; preds = %for.body, %entry 148 %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 149 %p.08 = phi i32* [ %arrayidx, %entry ], [ %incdec.ptr, %for.body ] 150 store i32 %i.09, i32* %p.08, align 4 151 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 152 %inc = add nuw nsw i32 %i.09, 1 153 %cmp = icmp eq i32* %incdec.ptr, %arrayidx1 154 br i1 %cmp, label %for.cond.cleanup, label %for.body 155 } 156 157 158 ; extern int* get_unknown_pointer(void); 159 160 ; kernel void ptr_induction_var_alloca_unknown(void) 161 ; { 162 ; int alloca[64]; 163 ; int i = 0; 164 ; 165 ; for (int* p = &alloca[2], *e = get_unknown_pointer(); p != e; ++p, ++i) 166 ; { 167 ; *p = i; 168 ; } 169 ; } 170 171 ; CHECK-LABEL: @ptr_induction_var_alloca_unknown( 172 ; CHECK: %alloca = alloca [64 x i32], align 4 173 ; CHECK: %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] 174 ; CHECK: %cmp = icmp eq i32* %incdec.ptr, %call 175 define amdgpu_kernel void @ptr_induction_var_alloca_unknown() #0 { 176 entry: 177 %alloca = alloca [64 x i32], align 4 178 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* %alloca, i32 0, i32 2 179 %call = tail call i32* @get_unknown_pointer() #2 180 %cmp.7 = icmp eq i32* %arrayidx, %call 181 br i1 %cmp.7, label %for.cond.cleanup, label %for.body.preheader 182 183 for.body.preheader: ; preds = %entry 184 br label %for.body 185 186 for.cond.cleanup.loopexit: ; preds = %for.body 187 br label %for.cond.cleanup 188 189 for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 190 ret void 191 192 for.body: ; preds = %for.body, %for.body.preheader 193 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 194 %p.08 = phi i32* [ %incdec.ptr, %for.body ], [ %arrayidx, %for.body.preheader ] 195 store i32 %i.09, i32* %p.08, align 4 196 %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i32 1 197 %inc = add nuw nsw i32 %i.09, 1 198 %cmp = icmp eq i32* %incdec.ptr, %call 199 br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body 200 } 201 202 declare i32* @get_unknown_pointer() #0 203 204 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" } 205