Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -debug-only=loop-vectorize -stats -S -vectorizer-min-trip-count=21 2>&1 | FileCheck %s
      2 ; REQUIRES: asserts
      3 
      4 ; CHECK: LV: Loop hints: force=enabled
      5 ; CHECK: LV: Loop hints: force=?
      6 ; No more loops in the module
      7 ; CHECK-NOT: LV: Loop hints: force=
      8 ; CHECK: 2 loop-vectorize               - Number of loops analyzed for vectorization
      9 ; CHECK: 1 loop-vectorize               - Number of loops vectorized
     10 
     11 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
     12 target triple = "x86_64-apple-macosx10.8.0"
     13 
     14 ;
     15 ; The source code for the test:
     16 ;
     17 ; void foo(float* restrict A, float* restrict B)
     18 ; {
     19 ;     for (int i = 0; i < 20; ++i) A[i] += B[i];
     20 ; }
     21 ;
     22 
     23 ;
     24 ; This loop will be vectorized, although the trip count is below the threshold, but vectorization is explicitly forced in metadata.
     25 ;
     26 define void @vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) {
     27 entry:
     28   br label %for.body
     29 
     30 for.body:
     31   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     32   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
     33   %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
     34   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
     35   %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
     36   %add = fadd fast float %0, %1
     37   store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
     38   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     39   %exitcond = icmp eq i64 %indvars.iv.next, 20
     40   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
     41 
     42 for.end:
     43   ret void
     44 }
     45 
     46 !1 = !{!1, !2}
     47 !2 = !{!"llvm.loop.vectorize.enable", i1 true}
     48 
     49 ;
     50 ; This loop will not be vectorized as the trip count is below the threshold.
     51 ;
     52 define void @not_vectorized(float* noalias nocapture %A, float* noalias nocapture readonly %B) {
     53 entry:
     54   br label %for.body
     55 
     56 for.body:
     57   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     58   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
     59   %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
     60   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
     61   %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
     62   %add = fadd fast float %0, %1
     63   store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
     64   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     65   %exitcond = icmp eq i64 %indvars.iv.next, 20
     66   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
     67 
     68 for.end:
     69   ret void
     70 }
     71 
     72 !3 = !{!3}
     73 
     74