1 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4 5 ; Check vectorization on an interleaved load group of factor 2 and an interleaved 6 ; store group of factor 2. 7 8 ; int AB[1024]; 9 ; int CD[1024]; 10 ; void test_array_load2_store2(int C, int D) { 11 ; for (int i = 0; i < 1024; i+=2) { 12 ; int A = AB[i]; 13 ; int B = AB[i+1]; 14 ; CD[i] = A + C; 15 ; CD[i+1] = B * D; 16 ; } 17 ; } 18 19 ; CHECK-LABEL: @test_array_load2_store2( 20 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 21 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 22 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 23 ; CHECK: add nsw <4 x i32> 24 ; CHECK: mul nsw <4 x i32> 25 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 26 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4 27 28 @AB = common global [1024 x i32] zeroinitializer, align 4 29 @CD = common global [1024 x i32] zeroinitializer, align 4 30 31 define void @test_array_load2_store2(i32 %C, i32 %D) { 32 entry: 33 br label %for.body 34 35 for.body: ; preds = %for.body, %entry 36 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 37 %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv 38 %tmp = load i32, i32* %arrayidx0, align 4 39 %tmp1 = or i64 %indvars.iv, 1 40 %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1 41 %tmp2 = load i32, i32* %arrayidx1, align 4 42 %add = add nsw i32 %tmp, %C 43 %mul = mul nsw i32 %tmp2, %D 44 %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv 45 store i32 %add, i32* %arrayidx2, align 4 46 %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1 47 store i32 %mul, i32* %arrayidx3, align 4 48 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 49 %cmp = icmp slt i64 %indvars.iv.next, 1024 50 br i1 %cmp, label %for.body, label %for.end 51 52 for.end: ; preds = %for.body 53 ret void 54 } 55 56 ; int A[3072]; 57 ; struct ST S[1024]; 58 ; void test_struct_st3() { 59 ; int *ptr = A; 60 ; for (int i = 0; i < 1024; i++) { 61 ; int X1 = *ptr++; 62 ; int X2 = *ptr++; 63 ; int X3 = *ptr++; 64 ; T[i].x = X1 + 1; 65 ; T[i].y = X2 + 2; 66 ; T[i].z = X3 + 3; 67 ; } 68 ; } 69 70 ; CHECK-LABEL: @test_struct_array_load3_store3( 71 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 72 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 73 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 74 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 75 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1> 76 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2> 77 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> 78 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 79 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 80 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 81 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4 82 83 %struct.ST3 = type { i32, i32, i32 } 84 @A = common global [3072 x i32] zeroinitializer, align 4 85 @S = common global [1024 x %struct.ST3] zeroinitializer, align 4 86 87 define void @test_struct_array_load3_store3() { 88 entry: 89 br label %for.body 90 91 for.body: ; preds = %for.body, %entry 92 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 93 %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ] 94 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1 95 %tmp = load i32, i32* %ptr.016, align 4 96 %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2 97 %tmp1 = load i32, i32* %incdec.ptr, align 4 98 %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3 99 %tmp2 = load i32, i32* %incdec.ptr1, align 4 100 %add = add nsw i32 %tmp, 1 101 %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0 102 store i32 %add, i32* %x, align 4 103 %add3 = add nsw i32 %tmp1, 2 104 %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1 105 store i32 %add3, i32* %y, align 4 106 %add6 = add nsw i32 %tmp2, 3 107 %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2 108 store i32 %add6, i32* %z, align 4 109 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 110 %exitcond = icmp eq i64 %indvars.iv.next, 1024 111 br i1 %exitcond, label %for.end, label %for.body 112 113 for.end: ; preds = %for.body 114 ret void 115 } 116 117 ; Check vectorization on an interleaved load group of factor 4. 118 119 ; struct ST4{ 120 ; int x; 121 ; int y; 122 ; int z; 123 ; int w; 124 ; }; 125 ; int test_struct_load4(struct ST4 *S) { 126 ; int r = 0; 127 ; for (int i = 0; i < 1024; i++) { 128 ; r += S[i].x; 129 ; r -= S[i].y; 130 ; r += S[i].z; 131 ; r -= S[i].w; 132 ; } 133 ; return r; 134 ; } 135 136 ; CHECK-LABEL: @test_struct_load4( 137 ; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4 138 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12> 139 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13> 140 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14> 141 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15> 142 ; CHECK: add nsw <4 x i32> 143 ; CHECK: sub <4 x i32> 144 ; CHECK: add nsw <4 x i32> 145 ; CHECK: sub <4 x i32> 146 147 %struct.ST4 = type { i32, i32, i32, i32 } 148 149 define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) { 150 entry: 151 br label %for.body 152 153 for.body: ; preds = %for.body, %entry 154 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 155 %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ] 156 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0 157 %tmp = load i32, i32* %x, align 4 158 %add = add nsw i32 %tmp, %r.022 159 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1 160 %tmp1 = load i32, i32* %y, align 4 161 %sub = sub i32 %add, %tmp1 162 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2 163 %tmp2 = load i32, i32* %z, align 4 164 %add5 = add nsw i32 %sub, %tmp2 165 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3 166 %tmp3 = load i32, i32* %w, align 4 167 %sub8 = sub i32 %add5, %tmp3 168 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 169 %exitcond = icmp eq i64 %indvars.iv.next, 1024 170 br i1 %exitcond, label %for.end, label %for.body 171 172 for.end: ; preds = %for.body 173 ret i32 %sub8 174 } 175 176 ; Check vectorization on an interleaved store group of factor 4. 177 178 ; void test_struct_store4(int *A, struct ST4 *B) { 179 ; int *ptr = A; 180 ; for (int i = 0; i < 1024; i++) { 181 ; int X = *ptr++; 182 ; B[i].x = X + 1; 183 ; B[i].y = X * 2; 184 ; B[i].z = X + 3; 185 ; B[i].w = X + 4; 186 ; } 187 ; } 188 189 ; CHECK-LABEL: @test_struct_store4( 190 ; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>* 191 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1> 192 ; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1> 193 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3> 194 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4> 195 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 196 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 197 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 198 ; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4 199 200 define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) { 201 entry: 202 br label %for.body 203 204 for.cond.cleanup: ; preds = %for.body 205 ret void 206 207 for.body: ; preds = %for.body, %entry 208 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 209 %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ] 210 %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1 211 %tmp = load i32, i32* %ptr.024, align 4 212 %add = add nsw i32 %tmp, 1 213 %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0 214 store i32 %add, i32* %x, align 4 215 %mul = shl nsw i32 %tmp, 1 216 %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1 217 store i32 %mul, i32* %y, align 4 218 %add3 = add nsw i32 %tmp, 3 219 %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2 220 store i32 %add3, i32* %z, align 4 221 %add6 = add nsw i32 %tmp, 4 222 %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3 223 store i32 %add6, i32* %w, align 4 224 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 225 %exitcond = icmp eq i64 %indvars.iv.next, 1024 226 br i1 %exitcond, label %for.cond.cleanup, label %for.body 227 } 228 229 ; Check vectorization on a reverse interleaved load group of factor 2 and 230 ; a reverse interleaved store group of factor 2. 231 232 ; struct ST2 { 233 ; int x; 234 ; int y; 235 ; }; 236 ; 237 ; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) { 238 ; for (int i = 1023; i >= 0; i--) { 239 ; int a = A[i].x + i; // interleaved load of index 0 240 ; int b = A[i].y - i; // interleaved load of index 1 241 ; B[i].x = a; // interleaved store of index 0 242 ; B[i].y = b; // interleaved store of index 1 243 ; } 244 ; } 245 246 ; CHECK-LABEL: @test_reversed_load2_store2( 247 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 248 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 249 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 250 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 251 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 252 ; CHECK: add nsw <4 x i32> 253 ; CHECK: sub nsw <4 x i32> 254 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 255 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 256 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 257 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4 258 259 %struct.ST2 = type { i32, i32 } 260 261 define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) { 262 entry: 263 br label %for.body 264 265 for.cond.cleanup: ; preds = %for.body 266 ret void 267 268 for.body: ; preds = %for.body, %entry 269 %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ] 270 %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0 271 %tmp = load i32, i32* %x, align 4 272 %tmp1 = trunc i64 %indvars.iv to i32 273 %add = add nsw i32 %tmp, %tmp1 274 %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1 275 %tmp2 = load i32, i32* %y, align 4 276 %sub = sub nsw i32 %tmp2, %tmp1 277 %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0 278 store i32 %add, i32* %x5, align 4 279 %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1 280 store i32 %sub, i32* %y8, align 4 281 %indvars.iv.next = add nsw i64 %indvars.iv, -1 282 %cmp = icmp sgt i64 %indvars.iv, 0 283 br i1 %cmp, label %for.body, label %for.cond.cleanup 284 } 285 286 ; Check vectorization on an interleaved load group of factor 2 with 1 gap 287 ; (missing the load of odd elements). 288 289 ; void even_load(int *A, int *B) { 290 ; for (unsigned i = 0; i < 1024; i+=2) 291 ; B[i/2] = A[i] * 2; 292 ; } 293 294 ; CHECK-LABEL: @even_load( 295 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 296 ; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 297 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 298 ; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1> 299 300 define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 301 entry: 302 br label %for.body 303 304 for.cond.cleanup: ; preds = %for.body 305 ret void 306 307 for.body: ; preds = %for.body, %entry 308 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 309 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 310 %tmp = load i32, i32* %arrayidx, align 4 311 %mul = shl nsw i32 %tmp, 1 312 %tmp1 = lshr exact i64 %indvars.iv, 1 313 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1 314 store i32 %mul, i32* %arrayidx2, align 4 315 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 316 %cmp = icmp ult i64 %indvars.iv.next, 1024 317 br i1 %cmp, label %for.body, label %for.cond.cleanup 318 } 319 320 ; Check vectorization on interleaved access groups identified from mixed 321 ; loads/stores. 322 ; void mixed_load2_store2(int *A, int *B) { 323 ; for (unsigned i = 0; i < 1024; i+=2) { 324 ; B[i] = A[i] * A[i+1]; 325 ; B[i+1] = A[i] + A[i+1]; 326 ; } 327 ; } 328 329 ; CHECK-LABEL: @mixed_load2_store2( 330 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4 331 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 332 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 333 ; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 334 ; CHECK: store <8 x i32> %interleaved.vec 335 336 define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) { 337 entry: 338 br label %for.body 339 340 for.cond.cleanup: ; preds = %for.body 341 ret void 342 343 for.body: ; preds = %for.body, %entry 344 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 345 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 346 %tmp = load i32, i32* %arrayidx, align 4 347 %tmp1 = or i64 %indvars.iv, 1 348 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1 349 %tmp2 = load i32, i32* %arrayidx2, align 4 350 %mul = mul nsw i32 %tmp2, %tmp 351 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 352 store i32 %mul, i32* %arrayidx4, align 4 353 %tmp3 = load i32, i32* %arrayidx, align 4 354 %tmp4 = load i32, i32* %arrayidx2, align 4 355 %add10 = add nsw i32 %tmp4, %tmp3 356 %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1 357 store i32 %add10, i32* %arrayidx13, align 4 358 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 359 %cmp = icmp ult i64 %indvars.iv.next, 1024 360 br i1 %cmp, label %for.body, label %for.cond.cleanup 361 } 362 363 ; Check vectorization on interleaved access groups identified from mixed 364 ; loads/stores. 365 ; void mixed_load3_store3(int *A) { 366 ; for (unsigned i = 0; i < 1024; i++) { 367 ; *A++ += i; 368 ; *A++ += i; 369 ; *A++ += i; 370 ; } 371 ; } 372 373 ; CHECK-LABEL: @mixed_load3_store3( 374 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4 375 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9> 376 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10> 377 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11> 378 ; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 379 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4 380 381 define void @mixed_load3_store3(i32* nocapture %A) { 382 entry: 383 br label %for.body 384 385 for.cond.cleanup: ; preds = %for.body 386 ret void 387 388 for.body: ; preds = %for.body, %entry 389 %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 390 %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ] 391 %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1 392 %tmp = load i32, i32* %A.addr.012, align 4 393 %add = add i32 %tmp, %i.013 394 store i32 %add, i32* %A.addr.012, align 4 395 %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2 396 %tmp1 = load i32, i32* %incdec.ptr, align 4 397 %add2 = add i32 %tmp1, %i.013 398 store i32 %add2, i32* %incdec.ptr, align 4 399 %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3 400 %tmp2 = load i32, i32* %incdec.ptr1, align 4 401 %add4 = add i32 %tmp2, %i.013 402 store i32 %add4, i32* %incdec.ptr1, align 4 403 %inc = add nuw nsw i32 %i.013, 1 404 %exitcond = icmp eq i32 %inc, 1024 405 br i1 %exitcond, label %for.cond.cleanup, label %for.body 406 } 407 408 ; Check vectorization on interleaved access groups with members having different 409 ; kinds of type. 410 411 ; struct IntFloat { 412 ; int a; 413 ; float b; 414 ; }; 415 ; 416 ; int SA; 417 ; float SB; 418 ; 419 ; void int_float_struct(struct IntFloat *A) { 420 ; int SumA; 421 ; float SumB; 422 ; for (unsigned i = 0; i < 1024; i++) { 423 ; SumA += A[i].a; 424 ; SumB += A[i].b; 425 ; } 426 ; SA = SumA; 427 ; SB = SumB; 428 ; } 429 430 ; CHECK-LABEL: @int_float_struct( 431 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4 432 ; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 433 ; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 434 ; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float> 435 ; CHECK: add nsw <4 x i32> 436 ; CHECK: fadd fast <4 x float> 437 438 %struct.IntFloat = type { i32, float } 439 440 @SA = common global i32 0, align 4 441 @SB = common global float 0.000000e+00, align 4 442 443 define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 { 444 entry: 445 br label %for.body 446 447 for.cond.cleanup: ; preds = %for.body 448 store i32 %add, i32* @SA, align 4 449 store float %add3, float* @SB, align 4 450 ret void 451 452 for.body: ; preds = %for.body, %entry 453 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 454 %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ] 455 %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ] 456 %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0 457 %tmp = load i32, i32* %a, align 4 458 %add = add nsw i32 %tmp, %SumA.013 459 %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1 460 %tmp1 = load float, float* %b, align 4 461 %add3 = fadd fast float %SumB.014, %tmp1 462 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 463 %exitcond = icmp eq i64 %indvars.iv.next, 1024 464 br i1 %exitcond, label %for.cond.cleanup, label %for.body 465 } 466 467 attributes #0 = { "unsafe-fp-math"="true" } 468