Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      4 
      5 ; Check vectorization on an interleaved load group of factor 2 and an interleaved
      6 ; store group of factor 2.
      7 
      8 ; int AB[1024];
      9 ; int CD[1024];
     10 ;  void test_array_load2_store2(int C, int D) {
     11 ;   for (int i = 0; i < 1024; i+=2) {
     12 ;     int A = AB[i];
     13 ;     int B = AB[i+1];
     14 ;     CD[i] = A + C;
     15 ;     CD[i+1] = B * D;
     16 ;   }
     17 ; }
     18 
     19 ; CHECK-LABEL: @test_array_load2_store2(
     20 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
     21 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
     22 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
     23 ; CHECK: add nsw <4 x i32>
     24 ; CHECK: mul nsw <4 x i32>
     25 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
     26 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
     27 
     28 @AB = common global [1024 x i32] zeroinitializer, align 4
     29 @CD = common global [1024 x i32] zeroinitializer, align 4
     30 
     31 define void @test_array_load2_store2(i32 %C, i32 %D) {
     32 entry:
     33   br label %for.body
     34 
     35 for.body:                                         ; preds = %for.body, %entry
     36   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     37   %arrayidx0 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %indvars.iv
     38   %tmp = load i32, i32* %arrayidx0, align 4
     39   %tmp1 = or i64 %indvars.iv, 1
     40   %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @AB, i64 0, i64 %tmp1
     41   %tmp2 = load i32, i32* %arrayidx1, align 4
     42   %add = add nsw i32 %tmp, %C
     43   %mul = mul nsw i32 %tmp2, %D
     44   %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %indvars.iv
     45   store i32 %add, i32* %arrayidx2, align 4
     46   %arrayidx3 = getelementptr inbounds [1024 x i32], [1024 x i32]* @CD, i64 0, i64 %tmp1
     47   store i32 %mul, i32* %arrayidx3, align 4
     48   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
     49   %cmp = icmp slt i64 %indvars.iv.next, 1024
     50   br i1 %cmp, label %for.body, label %for.end
     51 
     52 for.end:                                          ; preds = %for.body
     53   ret void
     54 }
     55 
     56 ; int A[3072];
     57 ; struct ST S[1024];
     58 ; void test_struct_st3() {
     59 ;   int *ptr = A;
     60 ;   for (int i = 0; i < 1024; i++) {
     61 ;     int X1 = *ptr++;
     62 ;     int X2 = *ptr++;
     63 ;     int X3 = *ptr++;
     64 ;     T[i].x = X1 + 1;
     65 ;     T[i].y = X2 + 2;
     66 ;     T[i].z = X3 + 3;
     67 ;   }
     68 ; }
     69 
     70 ; CHECK-LABEL: @test_struct_array_load3_store3(
     71 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
     72 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
     73 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
     74 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
     75 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 1, i32 1, i32 1, i32 1>
     76 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 2, i32 2, i32 2, i32 2>
     77 ; CHECK: add nsw <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
     78 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     79 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
     80 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
     81 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* {{.*}}, align 4
     82 
     83 %struct.ST3 = type { i32, i32, i32 }
     84 @A = common global [3072 x i32] zeroinitializer, align 4
     85 @S = common global [1024 x %struct.ST3] zeroinitializer, align 4
     86 
     87 define void @test_struct_array_load3_store3() {
     88 entry:
     89   br label %for.body
     90 
     91 for.body:                                         ; preds = %for.body, %entry
     92   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     93   %ptr.016 = phi i32* [ getelementptr inbounds ([3072 x i32], [3072 x i32]* @A, i64 0, i64 0), %entry ], [ %incdec.ptr2, %for.body ]
     94   %incdec.ptr = getelementptr inbounds i32, i32* %ptr.016, i64 1
     95   %tmp = load i32, i32* %ptr.016, align 4
     96   %incdec.ptr1 = getelementptr inbounds i32, i32* %ptr.016, i64 2
     97   %tmp1 = load i32, i32* %incdec.ptr, align 4
     98   %incdec.ptr2 = getelementptr inbounds i32, i32* %ptr.016, i64 3
     99   %tmp2 = load i32, i32* %incdec.ptr1, align 4
    100   %add = add nsw i32 %tmp, 1
    101   %x = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 0
    102   store i32 %add, i32* %x, align 4
    103   %add3 = add nsw i32 %tmp1, 2
    104   %y = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 1
    105   store i32 %add3, i32* %y, align 4
    106   %add6 = add nsw i32 %tmp2, 3
    107   %z = getelementptr inbounds [1024 x %struct.ST3], [1024 x %struct.ST3]* @S, i64 0, i64 %indvars.iv, i32 2
    108   store i32 %add6, i32* %z, align 4
    109   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    110   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    111   br i1 %exitcond, label %for.end, label %for.body
    112 
    113 for.end:                                          ; preds = %for.body
    114   ret void
    115 }
    116 
    117 ; Check vectorization on an interleaved load group of factor 4.
    118 
    119 ; struct ST4{
    120 ;   int x;
    121 ;   int y;
    122 ;   int z;
    123 ;   int w;
    124 ; };
    125 ; int test_struct_load4(struct ST4 *S) {
    126 ;   int r = 0;
    127 ;   for (int i = 0; i < 1024; i++) {
    128 ;      r += S[i].x;
    129 ;      r -= S[i].y;
    130 ;      r += S[i].z;
    131 ;      r -= S[i].w;
    132 ;   }
    133 ;   return r;
    134 ; }
    135 
    136 ; CHECK-LABEL: @test_struct_load4(
    137 ; CHECK: %wide.vec = load <16 x i32>, <16 x i32>* {{.*}}, align 4
    138 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
    139 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
    140 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
    141 ; CHECK: shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
    142 ; CHECK: add nsw <4 x i32>
    143 ; CHECK: sub <4 x i32>
    144 ; CHECK: add nsw <4 x i32>
    145 ; CHECK: sub <4 x i32>
    146 
    147 %struct.ST4 = type { i32, i32, i32, i32 }
    148 
    149 define i32 @test_struct_load4(%struct.ST4* nocapture readonly %S) {
    150 entry:
    151   br label %for.body
    152 
    153 for.body:                                         ; preds = %for.body, %entry
    154   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    155   %r.022 = phi i32 [ 0, %entry ], [ %sub8, %for.body ]
    156   %x = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 0
    157   %tmp = load i32, i32* %x, align 4
    158   %add = add nsw i32 %tmp, %r.022
    159   %y = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 1
    160   %tmp1 = load i32, i32* %y, align 4
    161   %sub = sub i32 %add, %tmp1
    162   %z = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 2
    163   %tmp2 = load i32, i32* %z, align 4
    164   %add5 = add nsw i32 %sub, %tmp2
    165   %w = getelementptr inbounds %struct.ST4, %struct.ST4* %S, i64 %indvars.iv, i32 3
    166   %tmp3 = load i32, i32* %w, align 4
    167   %sub8 = sub i32 %add5, %tmp3
    168   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    169   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    170   br i1 %exitcond, label %for.end, label %for.body
    171 
    172 for.end:                                          ; preds = %for.body
    173   ret i32 %sub8
    174 }
    175 
    176 ; Check vectorization on an interleaved store group of factor 4.
    177 
    178 ; void test_struct_store4(int *A, struct ST4 *B) {
    179 ;   int *ptr = A;
    180 ;   for (int i = 0; i < 1024; i++) {
    181 ;     int X = *ptr++;
    182 ;     B[i].x = X + 1;
    183 ;     B[i].y = X * 2;
    184 ;     B[i].z = X + 3;
    185 ;     B[i].w = X + 4;
    186 ;   }
    187 ; }
    188 
    189 ; CHECK-LABEL: @test_struct_store4(
    190 ; CHECK: %[[LD:.*]] = load <4 x i32>, <4 x i32>* 
    191 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
    192 ; CHECK: shl nsw <4 x i32> %[[LD]], <i32 1, i32 1, i32 1, i32 1>
    193 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 3, i32 3, i32 3, i32 3>
    194 ; CHECK: add nsw <4 x i32> %[[LD]], <i32 4, i32 4, i32 4, i32 4>
    195 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    196 ; CHECK: shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    197 ; CHECK: %interleaved.vec = shufflevector <8 x i32> {{.*}}, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
    198 ; CHECK: store <16 x i32> %interleaved.vec, <16 x i32>* {{.*}}, align 4
    199 
    200 define void @test_struct_store4(i32* noalias nocapture readonly %A, %struct.ST4* noalias nocapture %B) {
    201 entry:
    202   br label %for.body
    203 
    204 for.cond.cleanup:                                 ; preds = %for.body
    205   ret void
    206 
    207 for.body:                                         ; preds = %for.body, %entry
    208   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    209   %ptr.024 = phi i32* [ %A, %entry ], [ %incdec.ptr, %for.body ]
    210   %incdec.ptr = getelementptr inbounds i32, i32* %ptr.024, i64 1
    211   %tmp = load i32, i32* %ptr.024, align 4
    212   %add = add nsw i32 %tmp, 1
    213   %x = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 0
    214   store i32 %add, i32* %x, align 4
    215   %mul = shl nsw i32 %tmp, 1
    216   %y = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 1
    217   store i32 %mul, i32* %y, align 4
    218   %add3 = add nsw i32 %tmp, 3
    219   %z = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 2
    220   store i32 %add3, i32* %z, align 4
    221   %add6 = add nsw i32 %tmp, 4
    222   %w = getelementptr inbounds %struct.ST4, %struct.ST4* %B, i64 %indvars.iv, i32 3
    223   store i32 %add6, i32* %w, align 4
    224   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    225   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    226   br i1 %exitcond, label %for.cond.cleanup, label %for.body
    227 }
    228 
    229 ; Check vectorization on a reverse interleaved load group of factor 2 and
    230 ; a reverse interleaved store group of factor 2.
    231 
    232 ; struct ST2 {
    233 ;  int x;
    234 ;  int y;
    235 ; };
    236 ;
    237 ; void test_reversed_load2_store2(struct ST2 *A, struct ST2 *B) {
    238 ;   for (int i = 1023; i >= 0; i--) {
    239 ;     int a = A[i].x + i;  // interleaved load of index 0
    240 ;     int b = A[i].y - i;  // interleaved load of index 1
    241 ;     B[i].x = a;          // interleaved store of index 0
    242 ;     B[i].y = b;          // interleaved store of index 1
    243 ;   }
    244 ; }
    245 
    246 ; CHECK-LABEL: @test_reversed_load2_store2(
    247 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
    248 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    249 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    250 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    251 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    252 ; CHECK: add nsw <4 x i32>
    253 ; CHECK: sub nsw <4 x i32>
    254 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    255 ; CHECK: shufflevector <4 x i32> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    256 ; CHECK: %interleaved.vec = shufflevector <4 x i32> {{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
    257 ; CHECK: store <8 x i32> %interleaved.vec, <8 x i32>* %{{.*}}, align 4
    258 
    259 %struct.ST2 = type { i32, i32 }
    260 
    261 define void @test_reversed_load2_store2(%struct.ST2* noalias nocapture readonly %A, %struct.ST2* noalias nocapture %B) {
    262 entry:
    263   br label %for.body
    264 
    265 for.cond.cleanup:                                 ; preds = %for.body
    266   ret void
    267 
    268 for.body:                                         ; preds = %for.body, %entry
    269   %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %for.body ]
    270   %x = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 0
    271   %tmp = load i32, i32* %x, align 4
    272   %tmp1 = trunc i64 %indvars.iv to i32
    273   %add = add nsw i32 %tmp, %tmp1
    274   %y = getelementptr inbounds %struct.ST2, %struct.ST2* %A, i64 %indvars.iv, i32 1
    275   %tmp2 = load i32, i32* %y, align 4
    276   %sub = sub nsw i32 %tmp2, %tmp1
    277   %x5 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 0
    278   store i32 %add, i32* %x5, align 4
    279   %y8 = getelementptr inbounds %struct.ST2, %struct.ST2* %B, i64 %indvars.iv, i32 1
    280   store i32 %sub, i32* %y8, align 4
    281   %indvars.iv.next = add nsw i64 %indvars.iv, -1
    282   %cmp = icmp sgt i64 %indvars.iv, 0
    283   br i1 %cmp, label %for.body, label %for.cond.cleanup
    284 }
    285 
    286 ; Check vectorization on an interleaved load group of factor 2 with 1 gap
    287 ; (missing the load of odd elements).
    288 
    289 ; void even_load(int *A, int *B) {
    290 ;  for (unsigned i = 0; i < 1024; i+=2)
    291 ;     B[i/2] = A[i] * 2;
    292 ; }
    293 
    294 ; CHECK-LABEL: @even_load(
    295 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
    296 ; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    297 ; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    298 ; CHECK: shl nsw <4 x i32> %strided.vec, <i32 1, i32 1, i32 1, i32 1>
    299 
    300 define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
    301 entry:
    302   br label %for.body
    303 
    304 for.cond.cleanup:                                 ; preds = %for.body
    305   ret void
    306 
    307 for.body:                                         ; preds = %for.body, %entry
    308   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    309   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
    310   %tmp = load i32, i32* %arrayidx, align 4
    311   %mul = shl nsw i32 %tmp, 1
    312   %tmp1 = lshr exact i64 %indvars.iv, 1
    313   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
    314   store i32 %mul, i32* %arrayidx2, align 4
    315   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
    316   %cmp = icmp ult i64 %indvars.iv.next, 1024
    317   br i1 %cmp, label %for.body, label %for.cond.cleanup
    318 }
    319 
    320 ; Check vectorization on interleaved access groups identified from mixed
    321 ; loads/stores.
    322 ; void mixed_load2_store2(int *A, int *B) {
    323 ;   for (unsigned i = 0; i < 1024; i+=2)  {
    324 ;     B[i] = A[i] * A[i+1];
    325 ;     B[i+1] = A[i] + A[i+1];
    326 ;   }
    327 ; }
    328 
    329 ; CHECK-LABEL: @mixed_load2_store2(
    330 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* {{.*}}, align 4
    331 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    332 ; CHECK: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    333 ; CHECK: %interleaved.vec = shufflevector <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
    334 ; CHECK: store <8 x i32> %interleaved.vec
    335 
    336 define void @mixed_load2_store2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
    337 entry:
    338   br label %for.body
    339 
    340 for.cond.cleanup:                                 ; preds = %for.body
    341   ret void
    342 
    343 for.body:                                         ; preds = %for.body, %entry
    344   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    345   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
    346   %tmp = load i32, i32* %arrayidx, align 4
    347   %tmp1 = or i64 %indvars.iv, 1
    348   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp1
    349   %tmp2 = load i32, i32* %arrayidx2, align 4
    350   %mul = mul nsw i32 %tmp2, %tmp
    351   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
    352   store i32 %mul, i32* %arrayidx4, align 4
    353   %tmp3 = load i32, i32* %arrayidx, align 4
    354   %tmp4 = load i32, i32* %arrayidx2, align 4
    355   %add10 = add nsw i32 %tmp4, %tmp3
    356   %arrayidx13 = getelementptr inbounds i32, i32* %B, i64 %tmp1
    357   store i32 %add10, i32* %arrayidx13, align 4
    358   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
    359   %cmp = icmp ult i64 %indvars.iv.next, 1024
    360   br i1 %cmp, label %for.body, label %for.cond.cleanup
    361 }
    362 
    363 ; Check vectorization on interleaved access groups identified from mixed
    364 ; loads/stores.
    365 ; void mixed_load3_store3(int *A) {
    366 ;   for (unsigned i = 0; i < 1024; i++)  {
    367 ;     *A++ += i;
    368 ;     *A++ += i;
    369 ;     *A++ += i;
    370 ;   }
    371 ; }
    372 
    373 ; CHECK-LABEL: @mixed_load3_store3(
    374 ; CHECK: %wide.vec = load <12 x i32>, <12 x i32>* {{.*}}, align 4
    375 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
    376 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
    377 ; CHECK: shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
    378 ; CHECK: %interleaved.vec = shufflevector <8 x i32> %{{.*}}, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
    379 ; CHECK: store <12 x i32> %interleaved.vec, <12 x i32>* %{{.*}}, align 4
    380 
    381 define void @mixed_load3_store3(i32* nocapture %A) {
    382 entry:
    383   br label %for.body
    384 
    385 for.cond.cleanup:                                 ; preds = %for.body
    386   ret void
    387 
    388 for.body:                                         ; preds = %for.body, %entry
    389   %i.013 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    390   %A.addr.012 = phi i32* [ %A, %entry ], [ %incdec.ptr3, %for.body ]
    391   %incdec.ptr = getelementptr inbounds i32, i32* %A.addr.012, i64 1
    392   %tmp = load i32, i32* %A.addr.012, align 4
    393   %add = add i32 %tmp, %i.013
    394   store i32 %add, i32* %A.addr.012, align 4
    395   %incdec.ptr1 = getelementptr inbounds i32, i32* %A.addr.012, i64 2
    396   %tmp1 = load i32, i32* %incdec.ptr, align 4
    397   %add2 = add i32 %tmp1, %i.013
    398   store i32 %add2, i32* %incdec.ptr, align 4
    399   %incdec.ptr3 = getelementptr inbounds i32, i32* %A.addr.012, i64 3
    400   %tmp2 = load i32, i32* %incdec.ptr1, align 4
    401   %add4 = add i32 %tmp2, %i.013
    402   store i32 %add4, i32* %incdec.ptr1, align 4
    403   %inc = add nuw nsw i32 %i.013, 1
    404   %exitcond = icmp eq i32 %inc, 1024
    405   br i1 %exitcond, label %for.cond.cleanup, label %for.body
    406 }
    407 
    408 ; Check vectorization on interleaved access groups with members having different
    409 ; kinds of type.
    410 
    411 ; struct IntFloat {
    412 ;   int a;
    413 ;   float b;
    414 ; };
    415 ; 
    416 ; int SA;
    417 ; float SB;
    418 ;
    419 ; void int_float_struct(struct IntFloat *A) {
    420 ;   int SumA;
    421 ;   float SumB;
    422 ;   for (unsigned i = 0; i < 1024; i++)  {
    423 ;     SumA += A[i].a;
    424 ;     SumB += A[i].b;
    425 ;   }
    426 ;   SA = SumA;
    427 ;   SB = SumB;
    428 ; }
    429 
    430 ; CHECK-LABEL: @int_float_struct(
    431 ; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
    432 ; CHECK: %[[V0:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    433 ; CHECK: %[[V1:.*]] = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    434 ; CHECK: bitcast <4 x i32> %[[V1]] to <4 x float>
    435 ; CHECK: add nsw <4 x i32>
    436 ; CHECK: fadd fast <4 x float>
    437 
    438 %struct.IntFloat = type { i32, float }
    439 
    440 @SA = common global i32 0, align 4
    441 @SB = common global float 0.000000e+00, align 4
    442 
    443 define void @int_float_struct(%struct.IntFloat* nocapture readonly %A) #0 {
    444 entry:
    445   br label %for.body
    446 
    447 for.cond.cleanup:                                 ; preds = %for.body
    448   store i32 %add, i32* @SA, align 4
    449   store float %add3, float* @SB, align 4
    450   ret void
    451 
    452 for.body:                                         ; preds = %for.body, %entry
    453   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    454   %SumB.014 = phi float [ undef, %entry ], [ %add3, %for.body ]
    455   %SumA.013 = phi i32 [ undef, %entry ], [ %add, %for.body ]
    456   %a = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 0
    457   %tmp = load i32, i32* %a, align 4
    458   %add = add nsw i32 %tmp, %SumA.013
    459   %b = getelementptr inbounds %struct.IntFloat, %struct.IntFloat* %A, i64 %indvars.iv, i32 1
    460   %tmp1 = load float, float* %b, align 4
    461   %add3 = fadd fast float %SumB.014, %tmp1
    462   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    463   %exitcond = icmp eq i64 %indvars.iv.next, 1024
    464   br i1 %exitcond, label %for.cond.cleanup, label %for.body
    465 }
    466 
    467 attributes #0 = { "unsafe-fp-math"="true" }
    468