Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -mcpu=corei7 -O1 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1
      2 ; RUN: opt < %s -mcpu=corei7 -O2 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2
      3 ; RUN: opt < %s -mcpu=corei7 -O3 -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3
      4 ; RUN: opt < %s -mcpu=corei7 -Os -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os
      5 ; RUN: opt < %s -mcpu=corei7 -Oz -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz
      6 ; RUN: opt < %s -mcpu=corei7 -O1 -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC
      7 ; RUN: opt < %s -mcpu=corei7 -Oz -vectorize-loops -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC
      8 ; RUN: opt < %s -mcpu=corei7 -O1 -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2
      9 ; RUN: opt < %s -mcpu=corei7 -Oz -loop-vectorize -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2
     10 ; RUN: opt < %s -mcpu=corei7 -O3 -disable-loop-vectorization -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS
     11 
     12 ; This file tests the llvm.loop.vectorize.enable metadata forcing
     13 ; vectorization even when optimization levels are too low, or when
     14 ; vectorization is disabled.
     15 
     16 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
     17 target triple = "x86_64-unknown-linux-gnu"
     18 
     19 ; O1-LABEL: @enabled(
     20 ; O1: store <4 x i32>
     21 ; O1: ret i32
     22 ; O2-LABEL: @enabled(
     23 ; O2: store <4 x i32>
     24 ; O2: ret i32
     25 ; O3-LABEL: @enabled(
     26 ; O3: store <4 x i32>
     27 ; O3: ret i32
     28 ; Pragma always wins!
     29 ; O3DIS-LABEL: @enabled(
     30 ; O3DIS: store <4 x i32>
     31 ; O3DIS: ret i32
     32 ; Os-LABEL: @enabled(
     33 ; Os: store <4 x i32>
     34 ; Os: ret i32
     35 ; Oz-LABEL: @enabled(
     36 ; Oz: store <4 x i32>
     37 ; Oz: ret i32
     38 ; O1VEC-LABEL: @enabled(
     39 ; O1VEC: store <4 x i32>
     40 ; O1VEC: ret i32
     41 ; OzVEC-LABEL: @enabled(
     42 ; OzVEC: store <4 x i32>
     43 ; OzVEC: ret i32
     44 ; O1VEC2-LABEL: @enabled(
     45 ; O1VEC2: store <4 x i32>
     46 ; O1VEC2: ret i32
     47 ; OzVEC2-LABEL: @enabled(
     48 ; OzVEC2: store <4 x i32>
     49 ; OzVEC2: ret i32
     50 
     51 define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
     52 entry:
     53   br label %for.body
     54 
     55 for.body:                                         ; preds = %for.body, %entry
     56   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
     57   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
     58   %0 = load i32, i32* %arrayidx, align 4
     59   %add = add nsw i32 %0, %N
     60   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
     61   store i32 %add, i32* %arrayidx2, align 4
     62   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
     63   %exitcond = icmp eq i64 %indvars.iv.next, 64
     64   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
     65 
     66 for.end:                                          ; preds = %for.body
     67   %1 = load i32, i32* %a, align 4
     68   ret i32 %1
     69 }
     70 
     71 ; O1-LABEL: @nopragma(
     72 ; O1-NOT: store <4 x i32>
     73 ; O1: ret i32
     74 ; O2-LABEL: @nopragma(
     75 ; O2: store <4 x i32>
     76 ; O2: ret i32
     77 ; O3-LABEL: @nopragma(
     78 ; O3: store <4 x i32>
     79 ; O3: ret i32
     80 ; O3DIS-LABEL: @nopragma(
     81 ; O3DIS-NOT: store <4 x i32>
     82 ; O3DIS: ret i32
     83 ; Os-LABEL: @nopragma(
     84 ; Os: store <4 x i32>
     85 ; Os: ret i32
     86 ; Oz-LABEL: @nopragma(
     87 ; Oz-NOT: store <4 x i32>
     88 ; Oz: ret i32
     89 ; O1VEC-LABEL: @nopragma(
     90 ; O1VEC: store <4 x i32>
     91 ; O1VEC: ret i32
     92 ; OzVEC-LABEL: @nopragma(
     93 ; OzVEC: store <4 x i32>
     94 ; OzVEC: ret i32
     95 ; O1VEC2-LABEL: @nopragma(
     96 ; O1VEC2: store <4 x i32>
     97 ; O1VEC2: ret i32
     98 ; OzVEC2-LABEL: @nopragma(
     99 ; OzVEC2: store <4 x i32>
    100 ; OzVEC2: ret i32
    101 
    102 define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
    103 entry:
    104   br label %for.body
    105 
    106 for.body:                                         ; preds = %for.body, %entry
    107   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    108   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
    109   %0 = load i32, i32* %arrayidx, align 4
    110   %add = add nsw i32 %0, %N
    111   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    112   store i32 %add, i32* %arrayidx2, align 4
    113   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    114   %exitcond = icmp eq i64 %indvars.iv.next, 64
    115   br i1 %exitcond, label %for.end, label %for.body
    116 
    117 for.end:                                          ; preds = %for.body
    118   %1 = load i32, i32* %a, align 4
    119   ret i32 %1
    120 }
    121 
    122 ; O1-LABEL: @disabled(
    123 ; O1-NOT: store <4 x i32>
    124 ; O1: ret i32
    125 ; O2-LABEL: @disabled(
    126 ; O2-NOT: store <4 x i32>
    127 ; O2: ret i32
    128 ; O3-LABEL: @disabled(
    129 ; O3-NOT: store <4 x i32>
    130 ; O3: ret i32
    131 ; O3DIS-LABEL: @disabled(
    132 ; O3DIS-NOT: store <4 x i32>
    133 ; O3DIS: ret i32
    134 ; Os-LABEL: @disabled(
    135 ; Os-NOT: store <4 x i32>
    136 ; Os: ret i32
    137 ; Oz-LABEL: @disabled(
    138 ; Oz-NOT: store <4 x i32>
    139 ; Oz: ret i32
    140 ; O1VEC-LABEL: @disabled(
    141 ; O1VEC-NOT: store <4 x i32>
    142 ; O1VEC: ret i32
    143 ; OzVEC-LABEL: @disabled(
    144 ; OzVEC-NOT: store <4 x i32>
    145 ; OzVEC: ret i32
    146 ; O1VEC2-LABEL: @disabled(
    147 ; O1VEC2-NOT: store <4 x i32>
    148 ; O1VEC2: ret i32
    149 ; OzVEC2-LABEL: @disabled(
    150 ; OzVEC2-NOT: store <4 x i32>
    151 ; OzVEC2: ret i32
    152 
    153 define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) {
    154 entry:
    155   br label %for.body
    156 
    157 for.body:                                         ; preds = %for.body, %entry
    158   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
    159   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
    160   %0 = load i32, i32* %arrayidx, align 4
    161   %add = add nsw i32 %0, %N
    162   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
    163   store i32 %add, i32* %arrayidx2, align 4
    164   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
    165   %exitcond = icmp eq i64 %indvars.iv.next, 64
    166   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
    167 
    168 for.end:                                          ; preds = %for.body
    169   %1 = load i32, i32* %a, align 4
    170   ret i32 %1
    171 }
    172 
    173 !0 = !{!0, !1}
    174 !1 = !{!"llvm.loop.vectorize.enable", i1 1}
    175 !2 = !{!2, !3}
    176 !3 = !{!"llvm.loop.vectorize.enable", i1 0}
    177