Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.8.0"
      5 
      6 ;CHECK-LABEL: @reduction_sum(
      7 ;CHECK: phi <4 x i32>
      8 ;CHECK: load <4 x i32>
      9 ;CHECK: add <4 x i32>
     10 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
     11 ;CHECK: add <4 x i32>
     12 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
     13 ;CHECK: add <4 x i32>
     14 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
     15 ;CHECK: ret i32
     16 define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
     17   %1 = icmp sgt i32 %n, 0
     18   br i1 %1, label %.lr.ph, label %._crit_edge
     19 
     20 .lr.ph:                                           ; preds = %0, %.lr.ph
     21   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
     22   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
     23   %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
     24   %3 = load i32* %2, align 4
     25   %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
     26   %5 = load i32* %4, align 4
     27   %6 = trunc i64 %indvars.iv to i32
     28   %7 = add i32 %sum.02, %6
     29   %8 = add i32 %7, %3
     30   %9 = add i32 %8, %5
     31   %indvars.iv.next = add i64 %indvars.iv, 1
     32   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     33   %exitcond = icmp eq i32 %lftr.wideiv, %n
     34   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     35 
     36 ._crit_edge:                                      ; preds = %.lr.ph, %0
     37   %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
     38   ret i32 %sum.0.lcssa
     39 }
     40 
     41 ;CHECK-LABEL: @reduction_prod(
     42 ;CHECK: phi <4 x i32>
     43 ;CHECK: load <4 x i32>
     44 ;CHECK: mul <4 x i32>
     45 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
     46 ;CHECK: mul <4 x i32>
     47 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
     48 ;CHECK: mul <4 x i32>
     49 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
     50 ;CHECK: ret i32
     51 define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
     52   %1 = icmp sgt i32 %n, 0
     53   br i1 %1, label %.lr.ph, label %._crit_edge
     54 
     55 .lr.ph:                                           ; preds = %0, %.lr.ph
     56   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
     57   %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
     58   %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
     59   %3 = load i32* %2, align 4
     60   %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
     61   %5 = load i32* %4, align 4
     62   %6 = trunc i64 %indvars.iv to i32
     63   %7 = mul i32 %prod.02, %6
     64   %8 = mul i32 %7, %3
     65   %9 = mul i32 %8, %5
     66   %indvars.iv.next = add i64 %indvars.iv, 1
     67   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     68   %exitcond = icmp eq i32 %lftr.wideiv, %n
     69   br i1 %exitcond, label %._crit_edge, label %.lr.ph
     70 
     71 ._crit_edge:                                      ; preds = %.lr.ph, %0
     72   %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
     73   ret i32 %prod.0.lcssa
     74 }
     75 
     76 ;CHECK-LABEL: @reduction_mix(
     77 ;CHECK: phi <4 x i32>
     78 ;CHECK: load <4 x i32>
     79 ;CHECK: mul nsw <4 x i32>
     80 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
     81 ;CHECK: add <4 x i32>
     82 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
     83 ;CHECK: add <4 x i32>
     84 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
     85 ;CHECK: ret i32
     86 define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
     87   %1 = icmp sgt i32 %n, 0
     88   br i1 %1, label %.lr.ph, label %._crit_edge
     89 
     90 .lr.ph:                                           ; preds = %0, %.lr.ph
     91   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
     92   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
     93   %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
     94   %3 = load i32* %2, align 4
     95   %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
     96   %5 = load i32* %4, align 4
     97   %6 = mul nsw i32 %5, %3
     98   %7 = trunc i64 %indvars.iv to i32
     99   %8 = add i32 %sum.02, %7
    100   %9 = add i32 %8, %6
    101   %indvars.iv.next = add i64 %indvars.iv, 1
    102   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    103   %exitcond = icmp eq i32 %lftr.wideiv, %n
    104   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    105 
    106 ._crit_edge:                                      ; preds = %.lr.ph, %0
    107   %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
    108   ret i32 %sum.0.lcssa
    109 }
    110 
    111 ;CHECK-LABEL: @reduction_mul(
    112 ;CHECK: mul <4 x i32>
    113 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    114 ;CHECK: mul <4 x i32>
    115 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    116 ;CHECK: mul <4 x i32>
    117 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
    118 ;CHECK: ret i32
    119 define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
    120   %1 = icmp sgt i32 %n, 0
    121   br i1 %1, label %.lr.ph, label %._crit_edge
    122 
    123 .lr.ph:                                           ; preds = %0, %.lr.ph
    124   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
    125   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
    126   %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
    127   %3 = load i32* %2, align 4
    128   %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
    129   %5 = load i32* %4, align 4
    130   %6 = trunc i64 %indvars.iv to i32
    131   %7 = add i32 %3, %6
    132   %8 = add i32 %7, %5
    133   %9 = mul i32 %8, %sum.02
    134   %indvars.iv.next = add i64 %indvars.iv, 1
    135   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    136   %exitcond = icmp eq i32 %lftr.wideiv, %n
    137   br i1 %exitcond, label %._crit_edge, label %.lr.ph
    138 
    139 ._crit_edge:                                      ; preds = %.lr.ph, %0
    140   %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
    141   ret i32 %sum.0.lcssa
    142 }
    143 
    144 ;CHECK-LABEL: @start_at_non_zero(
    145 ;CHECK: phi <4 x i32>
    146 ;CHECK: <i32 120, i32 0, i32 0, i32 0>
    147 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    148 ;CHECK: add <4 x i32>
    149 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    150 ;CHECK: add <4 x i32>
    151 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
    152 ;CHECK: ret i32
    153 define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
    154 entry:
    155   %cmp7 = icmp sgt i32 %n, 0
    156   br i1 %cmp7, label %for.body, label %for.end
    157 
    158 for.body:                                         ; preds = %entry, %for.body
    159   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    160   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
    161   %arrayidx = getelementptr inbounds i32* %in, i64 %indvars.iv
    162   %0 = load i32* %arrayidx, align 4
    163   %arrayidx2 = getelementptr inbounds i32* %coeff, i64 %indvars.iv
    164   %1 = load i32* %arrayidx2, align 4
    165   %mul = mul nsw i32 %1, %0
    166   %add = add nsw i32 %mul, %sum.09
    167   %indvars.iv.next = add i64 %indvars.iv, 1
    168   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    169   %exitcond = icmp eq i32 %lftr.wideiv, %n
    170   br i1 %exitcond, label %for.end, label %for.body
    171 
    172 for.end:                                          ; preds = %for.body, %entry
    173   %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
    174   ret i32 %sum.0.lcssa
    175 }
    176 
    177 ;CHECK-LABEL: @reduction_and(
    178 ;CHECK: and <4 x i32>
    179 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
    180 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    181 ;CHECK: and <4 x i32>
    182 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    183 ;CHECK: and <4 x i32>
    184 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
    185 ;CHECK: ret i32
    186 define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
    187 entry:
    188   %cmp7 = icmp sgt i32 %n, 0
    189   br i1 %cmp7, label %for.body, label %for.end
    190 
    191 for.body:                                         ; preds = %entry, %for.body
    192   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    193   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
    194   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
    195   %0 = load i32* %arrayidx, align 4
    196   %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
    197   %1 = load i32* %arrayidx2, align 4
    198   %add = add nsw i32 %1, %0
    199   %and = and i32 %add, %result.08
    200   %indvars.iv.next = add i64 %indvars.iv, 1
    201   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    202   %exitcond = icmp eq i32 %lftr.wideiv, %n
    203   br i1 %exitcond, label %for.end, label %for.body
    204 
    205 for.end:                                          ; preds = %for.body, %entry
    206   %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
    207   ret i32 %result.0.lcssa
    208 }
    209 
    210 ;CHECK-LABEL: @reduction_or(
    211 ;CHECK: or <4 x i32>
    212 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    213 ;CHECK: or <4 x i32>
    214 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    215 ;CHECK: or <4 x i32>
    216 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
    217 ;CHECK: ret i32
    218 define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
    219 entry:
    220   %cmp7 = icmp sgt i32 %n, 0
    221   br i1 %cmp7, label %for.body, label %for.end
    222 
    223 for.body:                                         ; preds = %entry, %for.body
    224   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    225   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
    226   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
    227   %0 = load i32* %arrayidx, align 4
    228   %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
    229   %1 = load i32* %arrayidx2, align 4
    230   %add = add nsw i32 %1, %0
    231   %or = or i32 %add, %result.08
    232   %indvars.iv.next = add i64 %indvars.iv, 1
    233   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    234   %exitcond = icmp eq i32 %lftr.wideiv, %n
    235   br i1 %exitcond, label %for.end, label %for.body
    236 
    237 for.end:                                          ; preds = %for.body, %entry
    238   %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
    239   ret i32 %result.0.lcssa
    240 }
    241 
    242 ;CHECK-LABEL: @reduction_xor(
    243 ;CHECK: xor <4 x i32>
    244 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
    245 ;CHECK: xor <4 x i32>
    246 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
    247 ;CHECK: xor <4 x i32>
    248 ;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
    249 ;CHECK: ret i32
    250 define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
    251 entry:
    252   %cmp7 = icmp sgt i32 %n, 0
    253   br i1 %cmp7, label %for.body, label %for.end
    254 
    255 for.body:                                         ; preds = %entry, %for.body
    256   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    257   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
    258   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
    259   %0 = load i32* %arrayidx, align 4
    260   %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
    261   %1 = load i32* %arrayidx2, align 4
    262   %add = add nsw i32 %1, %0
    263   %xor = xor i32 %add, %result.08
    264   %indvars.iv.next = add i64 %indvars.iv, 1
    265   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    266   %exitcond = icmp eq i32 %lftr.wideiv, %n
    267   br i1 %exitcond, label %for.end, label %for.body
    268 
    269 for.end:                                          ; preds = %for.body, %entry
    270   %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
    271   ret i32 %result.0.lcssa
    272 }
    273 
    274 ; In this code the subtracted variable is on the RHS and this is not an induction variable.
    275 ;CHECK-LABEL: @reduction_sub_rhs(
    276 ;CHECK-NOT: phi <4 x i32>
    277 ;CHECK-NOT: sub nsw <4 x i32>
    278 ;CHECK: ret i32
    279 define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
    280 entry:
    281   %cmp4 = icmp sgt i32 %n, 0
    282   br i1 %cmp4, label %for.body, label %for.end
    283 
    284 for.body:                                         ; preds = %entry, %for.body
    285   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    286   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
    287   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
    288   %0 = load i32* %arrayidx, align 4
    289   %sub = sub nsw i32 %0, %x.05
    290   %indvars.iv.next = add i64 %indvars.iv, 1
    291   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    292   %exitcond = icmp eq i32 %lftr.wideiv, %n
    293   br i1 %exitcond, label %for.end, label %for.body
    294 
    295 for.end:                                          ; preds = %for.body, %entry
    296   %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
    297   ret i32 %x.0.lcssa
    298 }
    299 
    300 
    301 ; In this test the reduction variable is on the LHS and we can vectorize it.
    302 ;CHECK-LABEL: @reduction_sub_lhs(
    303 ;CHECK: phi <4 x i32>
    304 ;CHECK: sub nsw <4 x i32>
    305 ;CHECK: ret i32
    306 define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
    307 entry:
    308   %cmp4 = icmp sgt i32 %n, 0
    309   br i1 %cmp4, label %for.body, label %for.end
    310 
    311 for.body:                                         ; preds = %entry, %for.body
    312   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    313   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
    314   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
    315   %0 = load i32* %arrayidx, align 4
    316   %sub = sub nsw i32 %x.05, %0
    317   %indvars.iv.next = add i64 %indvars.iv, 1
    318   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    319   %exitcond = icmp eq i32 %lftr.wideiv, %n
    320   br i1 %exitcond, label %for.end, label %for.body
    321 
    322 for.end:                                          ; preds = %for.body, %entry
    323   %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
    324   ret i32 %x.0.lcssa
    325 }
    326 
    327 ; We can vectorize conditional reductions with multi-input phis.
    328 ; CHECK: reduction_conditional
    329 ; CHECK: fadd <4 x float>
    330 
    331 define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
    332 entry:
    333   br label %for.body
    334 
    335 for.body:
    336   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
    337   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
    338   %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
    339   %0 = load float* %arrayidx, align 4
    340   %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
    341   %1 = load float* %arrayidx2, align 4
    342   %cmp3 = fcmp ogt float %0, %1
    343   br i1 %cmp3, label %if.then, label %for.inc
    344 
    345 if.then:
    346   %cmp6 = fcmp ogt float %1, 1.000000e+00
    347   br i1 %cmp6, label %if.then8, label %if.else
    348 
    349 if.then8:
    350   %add = fadd fast float %sum.033, %0
    351   br label %for.inc
    352 
    353 if.else:
    354   %cmp14 = fcmp ogt float %0, 2.000000e+00
    355   br i1 %cmp14, label %if.then16, label %for.inc
    356 
    357 if.then16:
    358   %add19 = fadd fast float %sum.033, %1
    359   br label %for.inc
    360 
    361 for.inc:
    362   %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
    363   %indvars.iv.next = add i64 %indvars.iv, 1
    364   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    365   %exitcond = icmp ne i32 %lftr.wideiv, 128
    366   br i1 %exitcond, label %for.body, label %for.end
    367 
    368 for.end:
    369   %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
    370   ret float %sum.1.lcssa
    371 }
    372 
    373 ; We can't vectorize reductions with phi inputs from outside the reduction.
    374 ; CHECK: noreduction_phi
    375 ; CHECK-NOT: fadd <4 x float>
    376 define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
    377 entry:
    378   br label %for.body
    379 
    380 for.body:
    381   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
    382   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
    383   %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
    384   %0 = load float* %arrayidx, align 4
    385   %arrayidx2 = getelementptr inbounds float* %B, i64 %indvars.iv
    386   %1 = load float* %arrayidx2, align 4
    387   %cmp3 = fcmp ogt float %0, %1
    388   br i1 %cmp3, label %if.then, label %for.inc
    389 
    390 if.then:
    391   %cmp6 = fcmp ogt float %1, 1.000000e+00
    392   br i1 %cmp6, label %if.then8, label %if.else
    393 
    394 if.then8:
    395   %add = fadd fast float %sum.033, %0
    396   br label %for.inc
    397 
    398 if.else:
    399   %cmp14 = fcmp ogt float %0, 2.000000e+00
    400   br i1 %cmp14, label %if.then16, label %for.inc
    401 
    402 if.then16:
    403   %add19 = fadd fast float %sum.033, %1
    404   br label %for.inc
    405 
    406 for.inc:
    407   %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ]
    408   %indvars.iv.next = add i64 %indvars.iv, 1
    409   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    410   %exitcond = icmp ne i32 %lftr.wideiv, 128
    411   br i1 %exitcond, label %for.body, label %for.end
    412 
    413 for.end:
    414   %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
    415   ret float %sum.1.lcssa
    416 }
    417 
    418 ; We can't vectorize reductions that feed another header PHI.
    419 ; CHECK: noredux_header_phi
    420 ; CHECK-NOT: fadd <4 x float>
    421 
    422 define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S)  {
    423 entry:
    424   br label %for.body
    425 
    426 for.body:
    427   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    428   %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
    429   %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
    430   %arrayidx = getelementptr inbounds float* %B, i64 %indvars.iv
    431   %0 = load float* %arrayidx, align 4
    432   %add = fadd fast float %sum.08, %0
    433   %add1 = fadd fast float %sum2.09, %add
    434   %indvars.iv.next = add i64 %indvars.iv, 1
    435   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    436   %exitcond = icmp ne i32 %lftr.wideiv, 128
    437   br i1 %exitcond, label %for.body, label %for.end
    438 
    439 for.end:
    440   %add1.lcssa = phi float [ %add1, %for.body ]
    441   %add.lcssa = phi float [ %add, %for.body ]
    442   %add2 = fadd fast float %add.lcssa, %add1.lcssa
    443   ret float %add2
    444 }
    445 
    446 
    447 ; When vectorizing a reduction whose loop header phi value is used outside the
    448 ; loop special care must be taken. Otherwise, the reduced value feeding into the
    449 ; outside user misses a few iterations (VF-1) of the loop.
    450 ; PR16522
    451 
    452 ; CHECK-LABEL: @phivalueredux(
    453 ; CHECK-NOT: x i32>
    454 
    455 define i32 @phivalueredux(i32 %p) {
    456 entry:
    457   br label %for.body
    458 
    459 for.body:
    460   %t.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
    461   %p.addr.02 = phi i32 [ %p, %entry ], [ %xor, %for.body ]
    462   %xor = xor i32 %p.addr.02, -1
    463   %inc = add nsw i32 %t.03, 1
    464   %exitcond = icmp eq i32 %inc, 16
    465   br i1 %exitcond, label %for.end, label %for.body
    466 
    467 for.end:
    468   ret i32 %p.addr.02
    469 }
    470 
    471 ; Don't vectorize a reduction value that is not the last in a reduction cyle. We
    472 ; would loose iterations (VF-1) on the operations after that use.
    473 ; PR17498
    474 
    475 ; CHECK-LABEL: not_last_operation
    476 ; CHECK-NOT: x i32>
    477 define i32 @not_last_operation(i32 %p, i32 %val) {
    478 entry:
    479   %tobool = icmp eq i32 %p, 0
    480   br label %for.body
    481 
    482 for.body:
    483   %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
    484   %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
    485   %0 = zext i1 %tobool to i32
    486   %inc4.1 = xor i32 %0, 1
    487   %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
    488   %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
    489   %inc6.1 = add nsw i32 %inc613.1, 1
    490   %exitcond.1 = icmp eq i32 %inc6.1, 22
    491   br i1 %exitcond.1, label %exit, label %for.body
    492 
    493 exit:
    494   %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
    495   ret i32 %inc.2
    496 }
    497