Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.9.0"
      5 
      6 ; This is the loop in this example:
      7 ;
      8 ;int function0(int *a, int *b, int start, int end) {
      9 ;
     10 ;  for (int i=start; i<end; ++i) {
     11 ;    unsigned k = a[i];
     12 ;
     13 ;    if (a[i] > b[i])   <------ notice the IF inside the loop.
     14 ;      k = k * 5 + 3;
     15 ;
     16 ;    a[i] = k;  <---- K is a phi node that becomes vector-select.
     17 ;  }
     18 ;}
     19 
     20 ;CHECK-LABEL: @function0(
     21 ;CHECK: load <4 x i32>
     22 ;CHECK: icmp sgt <4 x i32>
     23 ;CHECK: mul <4 x i32>
     24 ;CHECK: add <4 x i32>
     25 ;CHECK: select <4 x i1>
     26 ;CHECK: ret i32
     27 define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
     28 entry:
     29   %cmp16 = icmp slt i32 %start, %end
     30   br i1 %cmp16, label %for.body.lr.ph, label %for.end
     31 
     32 for.body.lr.ph:
     33   %0 = sext i32 %start to i64
     34   br label %for.body
     35 
     36 for.body:
     37   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
     38   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
     39   %1 = load i32* %arrayidx, align 4
     40   %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
     41   %2 = load i32* %arrayidx4, align 4
     42   %cmp5 = icmp sgt i32 %1, %2
     43   br i1 %cmp5, label %if.then, label %if.end
     44 
     45 if.then:
     46   %mul = mul i32 %1, 5
     47   %add = add i32 %mul, 3
     48   br label %if.end
     49 
     50 if.end:
     51   %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
     52   store i32 %k.0, i32* %arrayidx, align 4
     53   %indvars.iv.next = add i64 %indvars.iv, 1
     54   %3 = trunc i64 %indvars.iv.next to i32
     55   %cmp = icmp slt i32 %3, %end
     56   br i1 %cmp, label %for.body, label %for.end
     57 
     58 for.end:
     59   ret i32 undef
     60 }
     61 
     62 
     63 
     64 ; int func(int *A, int n) {
     65 ;   unsigned sum = 0;
     66 ;   for (int i = 0; i < n; ++i)
     67 ;     if (A[i] > 30)
     68 ;       sum += A[i] + 2;
     69 ;
     70 ;   return sum;
     71 ; }
     72 
     73 ;CHECK-LABEL: @reduction_func(
     74 ;CHECK: load <4 x i32>
     75 ;CHECK: icmp sgt <4 x i32>
     76 ;CHECK: add <4 x i32>
     77 ;CHECK: select <4 x i1>
     78 ;CHECK: ret i32
     79 define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
     80 entry:
     81   %cmp10 = icmp sgt i32 %n, 0
     82   br i1 %cmp10, label %for.body, label %for.end
     83 
     84 for.body:                                         ; preds = %entry, %for.inc
     85   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
     86   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
     87   %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
     88   %0 = load i32* %arrayidx, align 4
     89   %cmp1 = icmp sgt i32 %0, 30
     90   br i1 %cmp1, label %if.then, label %for.inc
     91 
     92 if.then:                                          ; preds = %for.body
     93   %add = add i32 %sum.011, 2
     94   %add4 = add i32 %add, %0
     95   br label %for.inc
     96 
     97 for.inc:                                          ; preds = %for.body, %if.then
     98   %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
     99   %indvars.iv.next = add i64 %indvars.iv, 1
    100   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    101   %exitcond = icmp eq i32 %lftr.wideiv, %n
    102   br i1 %exitcond, label %for.end, label %for.body
    103 
    104 for.end:                                          ; preds = %for.inc, %entry
    105   %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ]
    106   ret i32 %sum.0.lcssa
    107 }
    108 
    109 @a = common global [1 x i32*] zeroinitializer, align 8
    110 @c = common global i32* null, align 8
    111 
    112 ; We use to if convert this loop. This is not safe because there is a trapping
    113 ; constant expression.
    114 ; PR16729
    115 
    116 ; CHECK-LABEL: trapping_constant_expression
    117 ; CHECK-NOT: or <4 x i32>
    118 
    119 define i32 @trapping_constant_expression() {
    120 entry:
    121   br label %for.body
    122 
    123 for.body:
    124   %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
    125   %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
    126   br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
    127 
    128 cond.false:
    129   br label %cond.end
    130 
    131 cond.end:
    132   %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ]
    133   %or = or i32 %or2, %cond
    134   %inc = add nsw i32 %inc3, 1
    135   %cmp = icmp slt i32 %inc, 128
    136   br i1 %cmp, label %for.body, label %for.end
    137 
    138 for.end:
    139   ret i32 %or
    140 }
    141 
    142 ; Neither should we if-convert if there is an instruction operand that is a
    143 ; trapping constant expression.
    144 ; PR16729
    145 
    146 ; CHECK-LABEL: trapping_constant_expression2
    147 ; CHECK-NOT: or <4 x i32>
    148 
    149 define i32 @trapping_constant_expression2() {
    150 entry:
    151   br label %for.body
    152 
    153 for.body:
    154   %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
    155   %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ]
    156   br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end
    157 
    158 cond.false:
    159   %cond.1 = or i32 %inc3, sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32))
    160   br label %cond.end
    161 
    162 cond.end:
    163   %cond = phi i32 [ %cond.1, %cond.false ], [ %inc3, %for.body ]
    164   %or = or i32 %or2, %cond
    165   %inc = add nsw i32 %inc3, 1
    166   %cmp = icmp slt i32 %inc, 128
    167   br i1 %cmp, label %for.body, label %for.end
    168 
    169 for.end:
    170   ret i32 %or
    171 }
    172