1 ; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s 2 3 ; Check vectorization that would ordinarily require a runtime bounds 4 ; check on the pointers when mixing address spaces. For now we cannot 5 ; assume address spaces do not alias, and we can't assume that 6 ; different pointers are directly comparable. 7 ; 8 ; These all test this basic loop for different combinations of address 9 ; spaces, and swapping in globals or adding noalias. 10 ; 11 ;void foo(int addrspace(N)* [noalias] a, int addrspace(M)* [noalias] b, int n) 12 ;{ 13 ; for (int i = 0; i < n; ++i) 14 ; { 15 ; a[i] = 3 * b[i]; 16 ; } 17 ;} 18 19 ; Artificial datalayout 20 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" 21 22 23 @g_as1 = common addrspace(1) global [1024 x i32] zeroinitializer, align 16 24 @q_as2 = common addrspace(2) global [1024 x i32] zeroinitializer, align 16 25 26 ; Both parameters are unidentified objects with the same address 27 ; space, so this should vectorize normally. 28 define void @foo(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %n) #0 { 29 ; CHECK-LABEL: @foo( 30 ; CHECK: <4 x i32> 31 ; CHECK: ret 32 33 entry: 34 br label %for.cond 35 36 for.cond: ; preds = %for.body, %entry 37 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 38 %cmp = icmp slt i32 %i.0, %n 39 br i1 %cmp, label %for.body, label %for.end 40 41 for.body: ; preds = %for.cond 42 %idxprom = sext i32 %i.0 to i64 43 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 44 %0 = load i32 addrspace(1)* %arrayidx, align 4 45 %mul = mul nsw i32 %0, 3 46 %idxprom1 = sext i32 %i.0 to i64 47 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 48 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 49 %inc = add nsw i32 %i.0, 1 50 br label %for.cond 51 52 for.end: ; preds = %for.cond 53 ret void 54 } 55 56 ; Parameters are unidentified and different address spaces, so cannot vectorize. 57 define void @bar0(i32* %a, i32 addrspace(1)* %b, i32 %n) #0 { 58 ; CHECK-LABEL: @bar0( 59 ; CHECK-NOT: <4 x i32> 60 ; CHECK: ret 61 62 entry: 63 br label %for.cond 64 65 for.cond: ; preds = %for.body, %entry 66 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 67 %cmp = icmp slt i32 %i.0, %n 68 br i1 %cmp, label %for.body, label %for.end 69 70 for.body: ; preds = %for.cond 71 %idxprom = sext i32 %i.0 to i64 72 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 73 %0 = load i32 addrspace(1)* %arrayidx, align 4 74 %mul = mul nsw i32 %0, 3 75 %idxprom1 = sext i32 %i.0 to i64 76 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 77 store i32 %mul, i32* %arrayidx2, align 4 78 %inc = add nsw i32 %i.0, 1 79 br label %for.cond 80 81 for.end: ; preds = %for.cond 82 ret void 83 } 84 85 ; Swapped arguments should be the same 86 define void @bar1(i32 addrspace(1)* %a, i32* %b, i32 %n) #0 { 87 ; CHECK-LABEL: @bar1( 88 ; CHECK-NOT: <4 x i32> 89 ; CHECK: ret 90 91 entry: 92 br label %for.cond 93 94 for.cond: ; preds = %for.body, %entry 95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 96 %cmp = icmp slt i32 %i.0, %n 97 br i1 %cmp, label %for.body, label %for.end 98 99 for.body: ; preds = %for.cond 100 %idxprom = sext i32 %i.0 to i64 101 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom 102 %0 = load i32* %arrayidx, align 4 103 %mul = mul nsw i32 %0, 3 104 %idxprom1 = sext i32 %i.0 to i64 105 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %a, i64 %idxprom1 106 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 107 %inc = add nsw i32 %i.0, 1 108 br label %for.cond 109 110 for.end: ; preds = %for.cond 111 ret void 112 } 113 114 ; We should still be able to vectorize with noalias even if the 115 ; address spaces are different. 116 define void @bar2(i32* noalias %a, i32 addrspace(1)* noalias %b, i32 %n) #0 { 117 ; CHECK-LABEL: @bar2( 118 ; CHECK: <4 x i32> 119 ; CHECK: ret 120 121 entry: 122 br label %for.cond 123 124 for.cond: ; preds = %for.body, %entry 125 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 126 %cmp = icmp slt i32 %i.0, %n 127 br i1 %cmp, label %for.body, label %for.end 128 129 for.body: ; preds = %for.cond 130 %idxprom = sext i32 %i.0 to i64 131 %arrayidx = getelementptr inbounds i32 addrspace(1)* %b, i64 %idxprom 132 %0 = load i32 addrspace(1)* %arrayidx, align 4 133 %mul = mul nsw i32 %0, 3 134 %idxprom1 = sext i32 %i.0 to i64 135 %arrayidx2 = getelementptr inbounds i32* %a, i64 %idxprom1 136 store i32 %mul, i32* %arrayidx2, align 4 137 %inc = add nsw i32 %i.0, 1 138 br label %for.cond 139 140 for.end: ; preds = %for.cond 141 ret void 142 } 143 144 ; Store to identified global with different address space. This isn't 145 ; generally safe and shouldn't be vectorized. 146 define void @arst0(i32* %b, i32 %n) #0 { 147 ; CHECK-LABEL: @arst0( 148 ; CHECK-NOT: <4 x i32> 149 ; CHECK: ret 150 151 entry: 152 br label %for.cond 153 154 for.cond: ; preds = %for.body, %entry 155 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 156 %cmp = icmp slt i32 %i.0, %n 157 br i1 %cmp, label %for.body, label %for.end 158 159 for.body: ; preds = %for.cond 160 %idxprom = sext i32 %i.0 to i64 161 %arrayidx = getelementptr inbounds i32* %b, i64 %idxprom 162 %0 = load i32* %arrayidx, align 4 163 %mul = mul nsw i32 %0, 3 164 %idxprom1 = sext i32 %i.0 to i64 165 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 166 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 167 %inc = add nsw i32 %i.0, 1 168 br label %for.cond 169 170 for.end: ; preds = %for.cond 171 ret void 172 } 173 174 175 ; Load from identified global with different address space. 176 ; This isn't generally safe and shouldn't be vectorized. 177 define void @arst1(i32* %b, i32 %n) #0 { 178 ; CHECK-LABEL: @arst1( 179 ; CHECK-NOT: <4 x i32> 180 ; CHECK: ret 181 182 entry: 183 br label %for.cond 184 185 for.cond: ; preds = %for.body, %entry 186 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 187 %cmp = icmp slt i32 %i.0, %n 188 br i1 %cmp, label %for.body, label %for.end 189 190 for.body: ; preds = %for.cond 191 %idxprom = sext i32 %i.0 to i64 192 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom 193 %0 = load i32 addrspace(1)* %arrayidx, align 4 194 %mul = mul nsw i32 %0, 3 195 %idxprom1 = sext i32 %i.0 to i64 196 %arrayidx2 = getelementptr inbounds i32* %b, i64 %idxprom1 197 store i32 %mul, i32* %arrayidx2, align 4 198 %inc = add nsw i32 %i.0, 1 199 br label %for.cond 200 201 for.end: ; preds = %for.cond 202 ret void 203 } 204 205 ; Read and write to 2 identified globals in different address 206 ; spaces. This should be vectorized. 207 define void @aoeu(i32 %n) #0 { 208 ; CHECK-LABEL: @aoeu( 209 ; CHECK: <4 x i32> 210 ; CHECK: ret 211 212 entry: 213 br label %for.cond 214 215 for.cond: ; preds = %for.body, %entry 216 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 217 %cmp = icmp slt i32 %i.0, %n 218 br i1 %cmp, label %for.body, label %for.end 219 220 for.body: ; preds = %for.cond 221 %idxprom = sext i32 %i.0 to i64 222 %arrayidx = getelementptr inbounds [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom 223 %0 = load i32 addrspace(2)* %arrayidx, align 4 224 %mul = mul nsw i32 %0, 3 225 %idxprom1 = sext i32 %i.0 to i64 226 %arrayidx2 = getelementptr inbounds [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1 227 store i32 %mul, i32 addrspace(1)* %arrayidx2, align 4 228 %inc = add nsw i32 %i.0, 1 229 br label %for.cond 230 231 for.end: ; preds = %for.cond 232 ret void 233 } 234 235 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 236