Home | History | Annotate | Download | only in LoopReroll
      1 ; RUN: opt < %s -loop-reroll -S | FileCheck %s
      2 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      3 target triple = "x86_64-unknown-linux-gnu"
      4 
      5 ; int foo(int a);
      6 ; void bar(int *x) {
      7 ;   for (int i = 0; i < 500; i += 3) {
      8 ;     foo(i);
      9 ;     foo(i+1);
     10 ;     foo(i+2);
     11 ;   }
     12 ; }
     13 
     14 ; Function Attrs: nounwind uwtable
     15 define void @bar(i32* nocapture readnone %x) #0 {
     16 entry:
     17   br label %for.body
     18 
     19 for.body:                                         ; preds = %for.body, %entry
     20   %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
     21   %call = tail call i32 @foo(i32 %i.08) #1
     22   %add = add nsw i32 %i.08, 1
     23   %call1 = tail call i32 @foo(i32 %add) #1
     24   %add2 = add nsw i32 %i.08, 2
     25   %call3 = tail call i32 @foo(i32 %add2) #1
     26   %add3 = add nsw i32 %i.08, 3
     27   %exitcond = icmp sge i32 %add3, 500
     28   br i1 %exitcond, label %for.end, label %for.body
     29 
     30 ; CHECK-LABEL: @bar
     31 
     32 ; CHECK: for.body:
     33 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
     34 ; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
     35 ; CHECK: %indvar.next = add i32 %indvar, 1
     36 ; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
     37 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
     38 
     39 ; CHECK: ret
     40 
     41 for.end:                                          ; preds = %for.body
     42   ret void
     43 }
     44 
     45 declare i32 @foo(i32)
     46 
     47 ; void hi1(int *x) {
     48 ;   for (int i = 0; i < 1500; i += 3) {
     49 ;     x[i] = foo(0);
     50 ;     x[i+1] = foo(0);
     51 ;     x[i+2] = foo(0);
     52 ;   }
     53 ; }
     54 
     55 ; Function Attrs: nounwind uwtable
     56 define void @hi1(i32* nocapture %x) #0 {
     57 entry:
     58   br label %for.body
     59 
     60 for.body:                                         ; preds = %entry, %for.body
     61   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     62   %call = tail call i32 @foo(i32 0) #1
     63   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
     64   store i32 %call, i32* %arrayidx, align 4
     65   %call1 = tail call i32 @foo(i32 0) #1
     66   %0 = add nsw i64 %indvars.iv, 1
     67   %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %0
     68   store i32 %call1, i32* %arrayidx3, align 4
     69   %call4 = tail call i32 @foo(i32 0) #1
     70   %1 = add nsw i64 %indvars.iv, 2
     71   %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %1
     72   store i32 %call4, i32* %arrayidx7, align 4
     73   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
     74   %2 = trunc i64 %indvars.iv.next to i32
     75   %cmp = icmp slt i32 %2, 1500
     76   br i1 %cmp, label %for.body, label %for.end
     77 
     78 ; CHECK-LABEL: @hi1
     79 
     80 ; CHECK: for.body:
     81 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
     82 ; CHECK: %0 = trunc i64 %indvar to i32
     83 ; CHECK: %call = tail call i32 @foo(i32 0) #1
     84 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar
     85 ; CHECK: store i32 %call, i32* %arrayidx, align 4
     86 ; CHECK: %indvar.next = add i64 %indvar, 1
     87 ; CHECK: %exitcond = icmp eq i32 %0, 1499
     88 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
     89 
     90 ; CHECK: ret
     91 
     92 for.end:                                          ; preds = %for.body
     93   ret void
     94 }
     95 
     96 ; void hi2(int *x) {
     97 ;   for (int i = 0; i < 500; ++i) {
     98 ;     x[3*i] = foo(0);
     99 ;     x[3*i+1] = foo(0);
    100 ;     x[3*i+2] = foo(0);
    101 ;   }
    102 ; }
    103 
    104 ; Function Attrs: nounwind uwtable
    105 define void @hi2(i32* nocapture %x) #0 {
    106 entry:
    107   br label %for.body
    108 
    109 for.body:                                         ; preds = %for.body, %entry
    110   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    111   %call = tail call i32 @foo(i32 0) #1
    112   %0 = mul nsw i64 %indvars.iv, 3
    113   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
    114   store i32 %call, i32* %arrayidx, align 4
    115   %call1 = tail call i32 @foo(i32 0) #1
    116   %1 = add nsw i64 %0, 1
    117   %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
    118   store i32 %call1, i32* %arrayidx4, align 4
    119   %call5 = tail call i32 @foo(i32 0) #1
    120   %2 = add nsw i64 %0, 2
    121   %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
    122   store i32 %call5, i32* %arrayidx9, align 4
    123   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    124   %exitcond = icmp eq i64 %indvars.iv.next, 500
    125   br i1 %exitcond, label %for.end, label %for.body
    126 
    127 ; CHECK-LABEL: @hi2
    128 
    129 ; CHECK: for.body:
    130 ; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    131 ; CHECK: %call = tail call i32 @foo(i32 0) #1
    132 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
    133 ; CHECK: store i32 %call, i32* %arrayidx, align 4
    134 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    135 ; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499
    136 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
    137 
    138 ; CHECK: ret
    139 
    140 for.end:                                          ; preds = %for.body
    141   ret void
    142 }
    143 
    144 ; void goo(float alpha, float *a, float *b) {
    145 ;   for (int i = 0; i < 3200; i += 5) {
    146 ;     a[i] += alpha * b[i];
    147 ;     a[i + 1] += alpha * b[i + 1];
    148 ;     a[i + 2] += alpha * b[i + 2];
    149 ;     a[i + 3] += alpha * b[i + 3];
    150 ;     a[i + 4] += alpha * b[i + 4];
    151 ;   }
    152 ; }
    153 
    154 ; Function Attrs: nounwind uwtable
    155 define void @goo(float %alpha, float* nocapture %a, float* nocapture readonly %b) #0 {
    156 entry:
    157   br label %for.body
    158 
    159 for.body:                                         ; preds = %entry, %for.body
    160   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    161   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
    162   %0 = load float, float* %arrayidx, align 4
    163   %mul = fmul float %0, %alpha
    164   %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
    165   %1 = load float, float* %arrayidx2, align 4
    166   %add = fadd float %1, %mul
    167   store float %add, float* %arrayidx2, align 4
    168   %2 = add nsw i64 %indvars.iv, 1
    169   %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
    170   %3 = load float, float* %arrayidx5, align 4
    171   %mul6 = fmul float %3, %alpha
    172   %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
    173   %4 = load float, float* %arrayidx9, align 4
    174   %add10 = fadd float %4, %mul6
    175   store float %add10, float* %arrayidx9, align 4
    176   %5 = add nsw i64 %indvars.iv, 2
    177   %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
    178   %6 = load float, float* %arrayidx13, align 4
    179   %mul14 = fmul float %6, %alpha
    180   %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
    181   %7 = load float, float* %arrayidx17, align 4
    182   %add18 = fadd float %7, %mul14
    183   store float %add18, float* %arrayidx17, align 4
    184   %8 = add nsw i64 %indvars.iv, 3
    185   %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
    186   %9 = load float, float* %arrayidx21, align 4
    187   %mul22 = fmul float %9, %alpha
    188   %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
    189   %10 = load float, float* %arrayidx25, align 4
    190   %add26 = fadd float %10, %mul22
    191   store float %add26, float* %arrayidx25, align 4
    192   %11 = add nsw i64 %indvars.iv, 4
    193   %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
    194   %12 = load float, float* %arrayidx29, align 4
    195   %mul30 = fmul float %12, %alpha
    196   %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
    197   %13 = load float, float* %arrayidx33, align 4
    198   %add34 = fadd float %13, %mul30
    199   store float %add34, float* %arrayidx33, align 4
    200   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
    201   %14 = trunc i64 %indvars.iv.next to i32
    202   %cmp = icmp slt i32 %14, 3200
    203   br i1 %cmp, label %for.body, label %for.end
    204 
    205 ; CHECK-LABEL: @goo
    206 
    207 ; CHECK: for.body:
    208 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
    209 ; CHECK: %0 = trunc i64 %indvar to i32
    210 ; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
    211 ; CHECK: %1 = load float, float* %arrayidx, align 4
    212 ; CHECK: %mul = fmul float %1, %alpha
    213 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
    214 ; CHECK: %2 = load float, float* %arrayidx2, align 4
    215 ; CHECK: %add = fadd float %2, %mul
    216 ; CHECK: store float %add, float* %arrayidx2, align 4
    217 ; CHECK: %indvar.next = add i64 %indvar, 1
    218 ; CHECK: %exitcond = icmp eq i32 %0, 3199
    219 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
    220 
    221 ; CHECK: ret
    222 
    223 for.end:                                          ; preds = %for.body
    224   ret void
    225 }
    226 
    227 ; void hoo(float alpha, float *a, float *b, int *ip) {
    228 ;   for (int i = 0; i < 3200; i += 5) {
    229 ;     a[i] += alpha * b[ip[i]];
    230 ;     a[i + 1] += alpha * b[ip[i + 1]];
    231 ;     a[i + 2] += alpha * b[ip[i + 2]];
    232 ;     a[i + 3] += alpha * b[ip[i + 3]];
    233 ;     a[i + 4] += alpha * b[ip[i + 4]];
    234 ;   }
    235 ; }
    236 
    237 ; Function Attrs: nounwind uwtable
    238 define void @hoo(float %alpha, float* nocapture %a, float* nocapture readonly %b, i32* nocapture readonly %ip) #0 {
    239 entry:
    240   br label %for.body
    241 
    242 for.body:                                         ; preds = %entry, %for.body
    243   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    244   %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvars.iv
    245   %0 = load i32, i32* %arrayidx, align 4
    246   %idxprom1 = sext i32 %0 to i64
    247   %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
    248   %1 = load float, float* %arrayidx2, align 4
    249   %mul = fmul float %1, %alpha
    250   %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
    251   %2 = load float, float* %arrayidx4, align 4
    252   %add = fadd float %2, %mul
    253   store float %add, float* %arrayidx4, align 4
    254   %3 = add nsw i64 %indvars.iv, 1
    255   %arrayidx7 = getelementptr inbounds i32, i32* %ip, i64 %3
    256   %4 = load i32, i32* %arrayidx7, align 4
    257   %idxprom8 = sext i32 %4 to i64
    258   %arrayidx9 = getelementptr inbounds float, float* %b, i64 %idxprom8
    259   %5 = load float, float* %arrayidx9, align 4
    260   %mul10 = fmul float %5, %alpha
    261   %arrayidx13 = getelementptr inbounds float, float* %a, i64 %3
    262   %6 = load float, float* %arrayidx13, align 4
    263   %add14 = fadd float %6, %mul10
    264   store float %add14, float* %arrayidx13, align 4
    265   %7 = add nsw i64 %indvars.iv, 2
    266   %arrayidx17 = getelementptr inbounds i32, i32* %ip, i64 %7
    267   %8 = load i32, i32* %arrayidx17, align 4
    268   %idxprom18 = sext i32 %8 to i64
    269   %arrayidx19 = getelementptr inbounds float, float* %b, i64 %idxprom18
    270   %9 = load float, float* %arrayidx19, align 4
    271   %mul20 = fmul float %9, %alpha
    272   %arrayidx23 = getelementptr inbounds float, float* %a, i64 %7
    273   %10 = load float, float* %arrayidx23, align 4
    274   %add24 = fadd float %10, %mul20
    275   store float %add24, float* %arrayidx23, align 4
    276   %11 = add nsw i64 %indvars.iv, 3
    277   %arrayidx27 = getelementptr inbounds i32, i32* %ip, i64 %11
    278   %12 = load i32, i32* %arrayidx27, align 4
    279   %idxprom28 = sext i32 %12 to i64
    280   %arrayidx29 = getelementptr inbounds float, float* %b, i64 %idxprom28
    281   %13 = load float, float* %arrayidx29, align 4
    282   %mul30 = fmul float %13, %alpha
    283   %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
    284   %14 = load float, float* %arrayidx33, align 4
    285   %add34 = fadd float %14, %mul30
    286   store float %add34, float* %arrayidx33, align 4
    287   %15 = add nsw i64 %indvars.iv, 4
    288   %arrayidx37 = getelementptr inbounds i32, i32* %ip, i64 %15
    289   %16 = load i32, i32* %arrayidx37, align 4
    290   %idxprom38 = sext i32 %16 to i64
    291   %arrayidx39 = getelementptr inbounds float, float* %b, i64 %idxprom38
    292   %17 = load float, float* %arrayidx39, align 4
    293   %mul40 = fmul float %17, %alpha
    294   %arrayidx43 = getelementptr inbounds float, float* %a, i64 %15
    295   %18 = load float, float* %arrayidx43, align 4
    296   %add44 = fadd float %18, %mul40
    297   store float %add44, float* %arrayidx43, align 4
    298   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
    299   %19 = trunc i64 %indvars.iv.next to i32
    300   %cmp = icmp slt i32 %19, 3200
    301   br i1 %cmp, label %for.body, label %for.end
    302 
    303 ; CHECK-LABEL: @hoo
    304 
    305 ; CHECK: for.body:
    306 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
    307 ; CHECK: %0 = trunc i64 %indvar to i32
    308 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar
    309 ; CHECK: %1 = load i32, i32* %arrayidx, align 4
    310 ; CHECK: %idxprom1 = sext i32 %1 to i64
    311 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
    312 ; CHECK: %2 = load float, float* %arrayidx2, align 4
    313 ; CHECK: %mul = fmul float %2, %alpha
    314 ; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar
    315 ; CHECK: %3 = load float, float* %arrayidx4, align 4
    316 ; CHECK: %add = fadd float %3, %mul
    317 ; CHECK: store float %add, float* %arrayidx4, align 4
    318 ; CHECK: %indvar.next = add i64 %indvar, 1
    319 ; CHECK: %exitcond = icmp eq i32 %0, 3199
    320 ; CHECK: br i1 %exitcond, label %for.end, label %for.body
    321 
    322 ; CHECK: ret
    323 
    324 for.end:                                          ; preds = %for.body
    325   ret void
    326 }
    327 
    328 ; void multi1(int *x) {
    329 ;   y = foo(0)
    330 ;   for (int i = 0; i < 500; ++i) {
    331 ;     x[3*i] = y;
    332 ;     x[3*i+1] = y;
    333 ;     x[3*i+2] = y;
    334 ;     x[3*i+6] = y;
    335 ;     x[3*i+7] = y;
    336 ;     x[3*i+8] = y;
    337 ;   }
    338 ; }
    339 
    340 ; Function Attrs: nounwind uwtable
    341 define void @multi1(i32* nocapture %x) #0 {
    342 entry:
    343   %call = tail call i32 @foo(i32 0) #1
    344   br label %for.body
    345 
    346 for.body:                                         ; preds = %for.body, %entry
    347   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    348   %0 = mul nsw i64 %indvars.iv, 3
    349   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
    350   store i32 %call, i32* %arrayidx, align 4
    351   %1 = add nsw i64 %0, 1
    352   %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
    353   store i32 %call, i32* %arrayidx4, align 4
    354   %2 = add nsw i64 %0, 2
    355   %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
    356   store i32 %call, i32* %arrayidx9, align 4
    357   %3 = add nsw i64 %0, 6
    358   %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %3
    359   store i32 %call, i32* %arrayidx6, align 4
    360   %4 = add nsw i64 %0, 7
    361   %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %4
    362   store i32 %call, i32* %arrayidx7, align 4
    363   %5 = add nsw i64 %0, 8
    364   %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %5
    365   store i32 %call, i32* %arrayidx8, align 4
    366   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    367   %exitcond = icmp eq i64 %indvars.iv.next, 500
    368   br i1 %exitcond, label %for.end, label %for.body
    369 
    370 ; CHECK-LABEL: @multi1
    371 
    372 ; CHECK:for.body:
    373 ; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    374 ; CHECK:  %0 = add i64 %indvars.iv, 6
    375 ; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
    376 ; CHECK:  store i32 %call, i32* %arrayidx, align 4
    377 ; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
    378 ; CHECK:  store i32 %call, i32* %arrayidx6, align 4
    379 ; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    380 ; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
    381 ; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
    382 
    383 for.end:                                          ; preds = %for.body
    384   ret void
    385 }
    386 
    387 ; void multi2(int *x) {
    388 ;   y = foo(0)
    389 ;   for (int i = 0; i < 500; ++i) {
    390 ;     x[3*i] = y;
    391 ;     x[3*i+1] = y;
    392 ;     x[3*i+2] = y;
    393 ;     x[3*(i+1)] = y;
    394 ;     x[3*(i+1)+1] = y;
    395 ;     x[3*(i+1)+2] = y;
    396 ;   }
    397 ; }
    398 
    399 ; Function Attrs: nounwind uwtable
    400 define void @multi2(i32* nocapture %x) #0 {
    401 entry:
    402   %call = tail call i32 @foo(i32 0) #1
    403   br label %for.body
    404 
    405 for.body:                                         ; preds = %for.body, %entry
    406   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    407   %0 = mul nsw i64 %indvars.iv, 3
    408   %add = add nsw i64 %indvars.iv, 1
    409   %newmul = mul nsw i64 %add, 3
    410   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
    411   store i32 %call, i32* %arrayidx, align 4
    412   %1 = add nsw i64 %0, 1
    413   %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
    414   store i32 %call, i32* %arrayidx4, align 4
    415   %2 = add nsw i64 %0, 2
    416   %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
    417   store i32 %call, i32* %arrayidx9, align 4
    418   %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %newmul
    419   store i32 %call, i32* %arrayidx6, align 4
    420   %3 = add nsw i64 %newmul, 1
    421   %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %3
    422   store i32 %call, i32* %arrayidx7, align 4
    423   %4 = add nsw i64 %newmul, 2
    424   %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %4
    425   store i32 %call, i32* %arrayidx8, align 4
    426   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    427   %exitcond = icmp eq i64 %indvars.iv.next, 500
    428   br i1 %exitcond, label %for.end, label %for.body
    429 
    430 ; CHECK-LABEL: @multi2
    431 
    432 ; CHECK:for.body:
    433 ; CHECK:  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    434 ; CHECK:  %0 = add i64 %indvars.iv, 3
    435 ; CHECK:  %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
    436 ; CHECK:  store i32 %call, i32* %arrayidx, align 4
    437 ; CHECK:  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0
    438 ; CHECK:  store i32 %call, i32* %arrayidx6, align 4
    439 ; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    440 ; CHECK:  %exitcond1 = icmp eq i64 %indvars.iv, 1499
    441 ; CHECK:  br i1 %exitcond1, label %for.end, label %for.body
    442 
    443 for.end:                                          ; preds = %for.body
    444   ret void
    445 }
    446 
    447 ; void multi3(int *x) {
    448 ;   y = foo(0)
    449 ;   for (int i = 0; i < 500; ++i) {
    450 ;     // Note: No zero index
    451 ;     x[3*i+3] = y;
    452 ;     x[3*i+4] = y;
    453 ;     x[3*i+5] = y;
    454 ;   }
    455 ; }
    456 
    457 ; Function Attrs: nounwind uwtable
    458 define void @multi3(i32* nocapture %x) #0 {
    459 entry:
    460   %call = tail call i32 @foo(i32 0) #1
    461   br label %for.body
    462 
    463 for.body:                                         ; preds = %for.body, %entry
    464   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    465   %0 = mul nsw i64 %indvars.iv, 3
    466   %x0 = add nsw i64 %0, 3
    467   %add = add nsw i64 %indvars.iv, 1
    468   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %x0
    469   store i32 %call, i32* %arrayidx, align 4
    470   %1 = add nsw i64 %0, 4
    471   %arrayidx4 = getelementptr inbounds i32, i32* %x, i64 %1
    472   store i32 %call, i32* %arrayidx4, align 4
    473   %2 = add nsw i64 %0, 5
    474   %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %2
    475   store i32 %call, i32* %arrayidx9, align 4
    476   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    477   %exitcond = icmp eq i64 %indvars.iv.next, 500
    478   br i1 %exitcond, label %for.end, label %for.body
    479 
    480 ; CHECK-LABEL: @multi3
    481 ; CHECK: for.body:
    482 ; CHECK:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    483 ; CHECK:   %0 = add i64 %indvars.iv, 3
    484 ; CHECK:   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
    485 ; CHECK:   store i32 %call, i32* %arrayidx, align 4
    486 ; CHECK:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    487 ; CHECK:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
    488 ; CHECK:   br i1 %exitcond1, label %for.end, label %for.body
    489 
    490 for.end:                                          ; preds = %for.body
    491   ret void
    492 }
    493 
    494 ; int foo(int a);
    495 ; void bar2(int *x, int y, int z) {
    496 ;   for (int i = 0; i < 500; i += 3) {
    497 ;     foo(i+y+i*z); // Slightly reordered instruction order
    498 ;     foo(i+1+y+(i+1)*z);
    499 ;     foo(i+2+y+(i+2)*z);
    500 ;   }
    501 ; }
    502 
    503 ; Function Attrs: nounwind uwtable
    504 define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 {
    505 entry:
    506   br label %for.body
    507 
    508 for.body:                                         ; preds = %for.body, %entry
    509   %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
    510 
    511   %tmp1 = add i32 %i.08, %y
    512   %tmp2 = mul i32 %i.08, %z
    513   %tmp3 = add i32 %tmp2, %tmp1
    514   %call = tail call i32 @foo(i32 %tmp3) #1
    515 
    516   %add = add nsw i32 %i.08, 1
    517   %tmp2a = mul i32 %add, %z
    518   %tmp1a = add i32 %add, %y
    519   %tmp3a = add i32 %tmp2a, %tmp1a
    520   %calla = tail call i32 @foo(i32 %tmp3a) #1
    521   
    522   %add2 = add nsw i32 %i.08, 2
    523   %tmp2b = mul i32 %add2, %z
    524   %tmp1b = add i32 %add2, %y
    525   %tmp3b = add i32 %tmp2b, %tmp1b
    526   %callb = tail call i32 @foo(i32 %tmp3b) #1
    527 
    528   %add3 = add nsw i32 %i.08, 3
    529 
    530   %exitcond = icmp sge i32 %add3, 500
    531   br i1 %exitcond, label %for.end, label %for.body
    532 
    533 ; CHECK-LABEL: @bar2
    534 
    535 ; CHECK: for.body:
    536 ; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
    537 ; CHECK: %tmp1 = add i32 %indvar, %y
    538 ; CHECK: %tmp2 = mul i32 %indvar, %z
    539 ; CHECK: %tmp3 = add i32 %tmp2, %tmp1
    540 ; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
    541 ; CHECK: %indvar.next = add i32 %indvar, 1
    542 ; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
    543 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body
    544 
    545 ; CHECK: ret
    546 
    547 for.end:                                          ; preds = %for.body
    548   ret void
    549 }
    550 
    551 %struct.s = type { i32, i32 }
    552 
    553 ; Function Attrs: nounwind uwtable
    554 define void @gep1(%struct.s* nocapture %x) #0 {
    555 entry:
    556   %call = tail call i32 @foo(i32 0) #1
    557   br label %for.body
    558 
    559 for.body:                                         ; preds = %for.body, %entry
    560   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    561   %0 = mul nsw i64 %indvars.iv, 3
    562   %arrayidx = getelementptr inbounds %struct.s, %struct.s* %x, i64 %0, i32 0
    563   store i32 %call, i32* %arrayidx, align 4
    564   %1 = add nsw i64 %0, 1
    565   %arrayidx4 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %1, i32 0
    566   store i32 %call, i32* %arrayidx4, align 4
    567   %2 = add nsw i64 %0, 2
    568   %arrayidx9 = getelementptr inbounds %struct.s, %struct.s* %x, i64 %2, i32 0
    569   store i32 %call, i32* %arrayidx9, align 4
    570   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    571   %exitcond = icmp eq i64 %indvars.iv.next, 500
    572   br i1 %exitcond, label %for.end, label %for.body
    573 
    574 ; CHECK-LABEL: @gep1
    575 ; This test is a crash test only.
    576 ; CHECK: ret
    577 for.end:                                          ; preds = %for.body
    578   ret void
    579 }
    580 
    581 define void @gep-indexing(i32* nocapture %x) {
    582 entry:
    583   %call = tail call i32 @foo(i32 0) #1
    584   br label %for.body
    585 
    586 for.body:                                         ; preds = %for.body, %entry
    587   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    588   %0 = mul nsw i64 %indvars.iv, 3
    589   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0
    590   store i32 %call, i32* %arrayidx, align 4
    591   %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1
    592   store i32 %call, i32* %arrayidx4, align 4
    593   %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2
    594   store i32 %call, i32* %arrayidx9, align 4
    595   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    596   %exitcond = icmp eq i64 %indvars.iv.next, 500
    597   br i1 %exitcond, label %for.end, label %for.body
    598 
    599 ; CHECK-LABEL: @gep-indexing
    600 ; CHECK:      for.body:
    601 ; CHECK-NEXT:   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    602 ; CHECK-NEXT:   %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv
    603 ; CHECK-NEXT:   store i32 %call, i32* %scevgep, align 4
    604 ; CHECK-NEXT:   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    605 ; CHECK-NEXT:   %exitcond1 = icmp eq i64 %indvars.iv, 1499
    606 ; CHECK-NEXT:   br i1 %exitcond1, label %for.end, label %for.body
    607 
    608 for.end:                                          ; preds = %for.body
    609   ret void
    610 }
    611 
    612 
    613 define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
    614 ; CHECK-LABEL: @unordered_atomic_ops(
    615 
    616 ; CHECK: for.body:
    617 ; CHECK-NEXT:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
    618 ; CHECK-NEXT:   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar
    619 ; CHECK-NEXT:   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar
    620 ; CHECK-NEXT:   %va = load atomic i32, i32* %buf0_a unordered, align 4
    621 ; CHECK-NEXT:   store atomic i32 %va, i32* %buf1_a unordered, align 4
    622 ; CHECK-NEXT:   %indvar.next = add i32 %indvar, 1
    623 ; CHECK-NEXT:   %exitcond = icmp eq i32 %indvar, 3199
    624 ; CHECK-NEXT:   br i1 %exitcond, label %for.end, label %for.body
    625 
    626 entry:
    627   br label %for.body
    628 
    629 for.body:
    630   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    631   %indvars.iv.next = add i32 %indvars.iv, 2
    632   %indvars.mid = add i32 %indvars.iv, 1
    633   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
    634   %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
    635   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
    636   %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
    637   %va = load atomic i32, i32* %buf0_a unordered, align 4
    638   %vb = load atomic i32, i32* %buf0_b unordered, align 4
    639   store atomic i32 %va, i32* %buf1_a unordered, align 4
    640   store atomic i32 %vb, i32* %buf1_b unordered, align 4
    641   %cmp = icmp slt i32 %indvars.iv.next, 3200
    642   br i1 %cmp, label %for.body, label %for.end
    643 
    644 for.end:
    645   ret void
    646 }
    647 
    648 define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) {
    649 ; Negative test
    650 
    651 ; CHECK-LABEL: @unordered_atomic_ops_nomatch(
    652 entry:
    653   br label %for.body
    654 
    655 for.body:
    656 ; CHECK: for.body:
    657 ; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
    658 ; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
    659 ; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
    660 ; CHECK:   br i1 %cmp, label %for.body, label %for.end
    661 
    662   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    663   %indvars.iv.next = add i32 %indvars.iv, 2
    664   %indvars.mid = add i32 %indvars.iv, 1
    665   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
    666   %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
    667   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
    668   %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
    669   %va = load atomic i32, i32* %buf0_a unordered, align 4
    670   %vb = load atomic i32, i32* %buf0_b unordered, align 4
    671   store i32 %va, i32* %buf1_a, align 4  ;; Not atomic
    672   store atomic i32 %vb, i32* %buf1_b unordered, align 4
    673   %cmp = icmp slt i32 %indvars.iv.next, 3200
    674   br i1 %cmp, label %for.body, label %for.end
    675 
    676 for.end:
    677   ret void
    678 }
    679 
    680 define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
    681 ; Negative test
    682 
    683 ; CHECK-LABEL: @ordered_atomic_ops(
    684 entry:
    685   br label %for.body
    686 
    687 for.body:
    688 ; CHECK: for.body:
    689 ; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
    690 ; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
    691 ; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
    692 ; CHECK:   br i1 %cmp, label %for.body, label %for.end
    693 
    694   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    695   %indvars.iv.next = add i32 %indvars.iv, 2
    696   %indvars.mid = add i32 %indvars.iv, 1
    697   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
    698   %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
    699   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
    700   %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
    701   %va = load atomic i32, i32* %buf0_a acquire, align 4
    702   %vb = load atomic i32, i32* %buf0_b acquire, align 4
    703   store atomic i32 %va, i32* %buf1_a release, align 4
    704   store atomic i32 %vb, i32* %buf1_b release, align 4
    705   %cmp = icmp slt i32 %indvars.iv.next, 3200
    706   br i1 %cmp, label %for.body, label %for.end
    707 
    708 for.end:
    709   ret void
    710 }
    711 
    712 define void @unordered_atomic_ops_with_fence(i32* noalias %buf_0, i32* noalias %buf_1) {
    713 ; CHECK-LABEL: @unordered_atomic_ops_with_fence(
    714 entry:
    715   br label %for.body
    716 
    717 for.body:
    718 ; CHECK: for.body:
    719 ; CHECK:  %va = load atomic i32, i32* %buf0_a unordered, align 4
    720 ; CHECK-NEXT:  %vb = load atomic i32, i32* %buf0_b unordered, align 4
    721 ; CHECK-NEXT:  fence seq_cst
    722 ; CHECK-NEXT:  store atomic i32 %va, i32* %buf1_a unordered, align 4
    723 ; CHECK-NEXT:  store atomic i32 %vb, i32* %buf1_b unordered, align 4
    724 
    725   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    726   %indvars.iv.next = add i32 %indvars.iv, 2
    727   %indvars.mid = add i32 %indvars.iv, 1
    728   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
    729   %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
    730   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
    731   %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
    732   %va = load atomic i32, i32* %buf0_a unordered, align 4
    733   %vb = load atomic i32, i32* %buf0_b unordered, align 4
    734   fence seq_cst
    735   store atomic i32 %va, i32* %buf1_a unordered, align 4
    736   store atomic i32 %vb, i32* %buf1_b unordered, align 4
    737   %cmp = icmp slt i32 %indvars.iv.next, 3200
    738   br i1 %cmp, label %for.body, label %for.end
    739 
    740 for.end:
    741   ret void
    742 }
    743 
    744 define void @pointer_bitcast_baseinst(i16* %arg, i8* %arg1, i64 %arg2) {
    745 ; CHECK-LABEL: @pointer_bitcast_baseinst(
    746 ; CHECK:       bb3:
    747 ; CHECK-NEXT:    %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ]
    748 ; CHECK-NEXT:    %4 = shl i64 %indvar, 3
    749 ; CHECK-NEXT:    %5 = add i64 %4, 1
    750 ; CHECK-NEXT:    %tmp5 = shl nuw i64 %5, 1
    751 ; CHECK-NEXT:    %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
    752 ; CHECK-NEXT:    %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
    753 ; CHECK-NEXT:    %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
    754 ; CHECK-NEXT:    %tmp13 = getelementptr i16, i16* %arg, i64 %5
    755 ; CHECK-NEXT:    %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
    756 ; CHECK-NEXT:    store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
    757 ; CHECK-NEXT:    %indvar.next = add i64 %indvar, 1
    758 ; CHECK-NEXT:    %exitcond = icmp eq i64 %indvar, %3
    759 ; CHECK-NEXT:    br i1 %exitcond, label %bb19, label %bb3
    760 bb:
    761   br label %bb3
    762 
    763 bb3:                                              ; preds = %bb3, %bb
    764   %tmp = phi i64 [ 1, %bb ], [ %tmp17, %bb3 ]
    765   %tmp4 = add nuw i64 %tmp, 8
    766   %tmp5 = shl nuw i64 %tmp, 1
    767   %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5
    768   %tmp7 = bitcast i8* %tmp6 to <8 x i16>*
    769   %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2
    770   %tmp9 = shl i64 %tmp4, 1
    771   %tmp10 = getelementptr i8, i8* %arg1, i64 %tmp9
    772   %tmp11 = bitcast i8* %tmp10 to <8 x i16>*
    773   %tmp12 = load <8 x i16>, <8 x i16>* %tmp11, align 2
    774   %tmp13 = getelementptr i16, i16* %arg, i64 %tmp
    775   %tmp14 = bitcast i16* %tmp13 to <8 x i16>*
    776   store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2
    777   %tmp15 = getelementptr i16, i16* %arg, i64 %tmp4
    778   %tmp16 = bitcast i16* %tmp15 to <8 x i16>*
    779   store <8 x i16> %tmp12, <8 x i16>* %tmp16, align 2
    780   %tmp17 = add nuw nsw i64 %tmp, 16
    781   %tmp18 = icmp eq i64 %tmp17, %arg2
    782   br i1 %tmp18, label %bb19, label %bb3
    783 
    784 bb19:                                             ; preds = %bb3
    785   ret void
    786 }
    787 
    788 attributes #0 = { nounwind uwtable }
    789 attributes #1 = { nounwind }
    790 
    791