Home | History | Annotate | Download | only in X86
      1 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      2 target triple = "x86_64-apple-macosx10.8.0"
      3 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck %s
      4 
      5 
      6 ; CHECK: tiny_tree_fully_vectorizable
      7 ; CHECK: load <2 x double>
      8 ; CHECK: store <2 x double>
      9 ; CHECK: ret 
     10 
     11 define void @tiny_tree_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
     12 entry:
     13   %cmp12 = icmp eq i64 %count, 0
     14   br i1 %cmp12, label %for.end, label %for.body
     15 
     16 for.body:                                         ; preds = %entry, %for.body
     17   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     18   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
     19   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
     20   %0 = load double, double* %src.addr.013, align 8
     21   store double %0, double* %dst.addr.014, align 8
     22   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
     23   %1 = load double, double* %arrayidx2, align 8
     24   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
     25   store double %1, double* %arrayidx3, align 8
     26   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
     27   %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
     28   %inc = add i64 %i.015, 1
     29   %exitcond = icmp eq i64 %inc, %count
     30   br i1 %exitcond, label %for.end, label %for.body
     31 
     32 for.end:                                          ; preds = %for.body, %entry
     33   ret void
     34 }
     35 
     36 ; CHECK: tiny_tree_fully_vectorizable2
     37 ; CHECK: load <4 x float>
     38 ; CHECK: store <4 x float>
     39 ; CHECK: ret
     40 
     41 define void @tiny_tree_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
     42 entry:
     43   %cmp20 = icmp eq i64 %count, 0
     44   br i1 %cmp20, label %for.end, label %for.body
     45 
     46 for.body:                                         ; preds = %entry, %for.body
     47   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     48   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
     49   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
     50   %0 = load float, float* %src.addr.021, align 4
     51   store float %0, float* %dst.addr.022, align 4
     52   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
     53   %1 = load float, float* %arrayidx2, align 4
     54   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
     55   store float %1, float* %arrayidx3, align 4
     56   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
     57   %2 = load float, float* %arrayidx4, align 4
     58   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
     59   store float %2, float* %arrayidx5, align 4
     60   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
     61   %3 = load float, float* %arrayidx6, align 4
     62   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
     63   store float %3, float* %arrayidx7, align 4
     64   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
     65   %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
     66   %inc = add i64 %i.023, 1
     67   %exitcond = icmp eq i64 %inc, %count
     68   br i1 %exitcond, label %for.end, label %for.body
     69 
     70 for.end:                                          ; preds = %for.body, %entry
     71   ret void
     72 }
     73 
     74 ; We do not vectorize the tiny tree which is not fully vectorizable. 
     75 ; CHECK: tiny_tree_not_fully_vectorizable
     76 ; CHECK-NOT: <2 x double>
     77 ; CHECK: ret 
     78 
     79 define void @tiny_tree_not_fully_vectorizable(double* noalias nocapture %dst, double* noalias nocapture readonly %src, i64 %count) #0 {
     80 entry:
     81   %cmp12 = icmp eq i64 %count, 0
     82   br i1 %cmp12, label %for.end, label %for.body
     83 
     84 for.body:                                         ; preds = %entry, %for.body
     85   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
     86   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
     87   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
     88   %0 = load double, double* %src.addr.013, align 8
     89   store double %0, double* %dst.addr.014, align 8
     90   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
     91   %1 = load double, double* %arrayidx2, align 8
     92   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1 
     93   store double %1, double* %arrayidx3, align 8
     94   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
     95   %add.ptr4 = getelementptr inbounds double, double* %dst.addr.014, i64 %i.015
     96   %inc = add i64 %i.015, 1
     97   %exitcond = icmp eq i64 %inc, %count
     98   br i1 %exitcond, label %for.end, label %for.body
     99 
    100 for.end:                                          ; preds = %for.body, %entry
    101   ret void
    102 }
    103 
    104 
    105 ; CHECK: tiny_tree_not_fully_vectorizable2
    106 ; CHECK-NOT: <2 x double>
    107 ; CHECK: ret
    108 
    109 define void @tiny_tree_not_fully_vectorizable2(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %count) #0 {
    110 entry:
    111   %cmp20 = icmp eq i64 %count, 0
    112   br i1 %cmp20, label %for.end, label %for.body
    113 
    114 for.body:                                         ; preds = %entry, %for.body
    115   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
    116   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
    117   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
    118   %0 = load float, float* %src.addr.021, align 4
    119   store float %0, float* %dst.addr.022, align 4
    120   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4 
    121   %1 = load float, float* %arrayidx2, align 4
    122   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
    123   store float %1, float* %arrayidx3, align 4
    124   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
    125   %2 = load float, float* %arrayidx4, align 4
    126   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
    127   store float %2, float* %arrayidx5, align 4
    128   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
    129   %3 = load float, float* %arrayidx6, align 4
    130   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
    131   store float %3, float* %arrayidx7, align 4
    132   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
    133   %add.ptr8 = getelementptr inbounds float, float* %dst.addr.022, i64 %i.023
    134   %inc = add i64 %i.023, 1
    135   %exitcond = icmp eq i64 %inc, %count
    136   br i1 %exitcond, label %for.end, label %for.body
    137 
    138 for.end:                                          ; preds = %for.body, %entry
    139   ret void
    140 }
    141 
    142 
    143 ; CHECK-LABEL: store_splat
    144 ; CHECK: store <4 x float>
    145 define void @store_splat(float*, float) {
    146   %3 = getelementptr inbounds float, float* %0, i64 0
    147   store float %1, float* %3, align 4
    148   %4 = getelementptr inbounds float, float* %0, i64 1
    149   store float %1, float* %4, align 4
    150   %5 = getelementptr inbounds float, float* %0, i64 2
    151   store float %1, float* %5, align 4
    152   %6 = getelementptr inbounds float, float* %0, i64 3
    153   store float %1, float* %6, align 4
    154   ret void
    155 }
    156 
    157 
    158 ; CHECK-LABEL: store_const
    159 ; CHECK: store <4 x i32>
    160 define void @store_const(i32* %a) {
    161 entry:
    162   %ptr0 = getelementptr inbounds i32, i32* %a, i64 0
    163   store i32 10, i32* %ptr0, align 4
    164   %ptr1 = getelementptr inbounds i32, i32* %a, i64 1
    165   store i32 30, i32* %ptr1, align 4
    166   %ptr2 = getelementptr inbounds i32, i32* %a, i64 2
    167   store i32 20, i32* %ptr2, align 4
    168   %ptr3 = getelementptr inbounds i32, i32* %a, i64 3
    169   store i32 40, i32* %ptr3, align 4
    170   ret void
    171 }
    172