Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-linux-gnu"
      5 
      6 ;CHECK-LABEL: @sqrt_f32(
      7 ;CHECK: vsqrtf{{.*}}<4 x float>
      8 ;CHECK: ret void
      9 declare float @sqrtf(float) nounwind readnone
     10 define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     11 entry:
     12   %cmp6 = icmp sgt i32 %n, 0
     13   br i1 %cmp6, label %for.body, label %for.end
     14 
     15 for.body:                                         ; preds = %entry, %for.body
     16   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     17   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
     18   %0 = load float, float* %arrayidx, align 4
     19   %call = tail call float @sqrtf(float %0) nounwind readnone
     20   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
     21   store float %call, float* %arrayidx2, align 4
     22   %indvars.iv.next = add i64 %indvars.iv, 1
     23   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     24   %exitcond = icmp eq i32 %lftr.wideiv, %n
     25   br i1 %exitcond, label %for.end, label %for.body
     26 
     27 for.end:                                          ; preds = %for.body, %entry
     28   ret void
     29 }
     30 
     31 ;CHECK-LABEL: @exp_f32(
     32 ;CHECK: vexpf{{.*}}<4 x float>
     33 ;CHECK: ret void
     34 declare float @expf(float) nounwind readnone
     35 define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     36 entry:
     37   %cmp6 = icmp sgt i32 %n, 0
     38   br i1 %cmp6, label %for.body, label %for.end
     39 
     40 for.body:                                         ; preds = %entry, %for.body
     41   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     42   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
     43   %0 = load float, float* %arrayidx, align 4
     44   %call = tail call float @expf(float %0) nounwind readnone
     45   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
     46   store float %call, float* %arrayidx2, align 4
     47   %indvars.iv.next = add i64 %indvars.iv, 1
     48   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     49   %exitcond = icmp eq i32 %lftr.wideiv, %n
     50   br i1 %exitcond, label %for.end, label %for.body
     51 
     52 for.end:                                          ; preds = %for.body, %entry
     53   ret void
     54 }
     55 
     56 ;CHECK-LABEL: @log_f32(
     57 ;CHECK: vlogf{{.*}}<4 x float>
     58 ;CHECK: ret void
     59 declare float @logf(float) nounwind readnone
     60 define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     61 entry:
     62   %cmp6 = icmp sgt i32 %n, 0
     63   br i1 %cmp6, label %for.body, label %for.end
     64 
     65 for.body:                                         ; preds = %entry, %for.body
     66   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     67   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
     68   %0 = load float, float* %arrayidx, align 4
     69   %call = tail call float @logf(float %0) nounwind readnone
     70   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
     71   store float %call, float* %arrayidx2, align 4
     72   %indvars.iv.next = add i64 %indvars.iv, 1
     73   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     74   %exitcond = icmp eq i32 %lftr.wideiv, %n
     75   br i1 %exitcond, label %for.end, label %for.body
     76 
     77 for.end:                                          ; preds = %for.body, %entry
     78   ret void
     79 }
     80 
     81 ; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call.
     82 ;CHECK-LABEL: @fabs_f32(
     83 ;CHECK: fabs{{.*}}<4 x float>
     84 ;CHECK: ret void
     85 declare float @fabsf(float) nounwind readnone
     86 define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     87 entry:
     88   %cmp6 = icmp sgt i32 %n, 0
     89   br i1 %cmp6, label %for.body, label %for.end
     90 
     91 for.body:                                         ; preds = %entry, %for.body
     92   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     93   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
     94   %0 = load float, float* %arrayidx, align 4
     95   %call = tail call float @fabsf(float %0) nounwind readnone
     96   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
     97   store float %call, float* %arrayidx2, align 4
     98   %indvars.iv.next = add i64 %indvars.iv, 1
     99   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    100   %exitcond = icmp eq i32 %lftr.wideiv, %n
    101   br i1 %exitcond, label %for.end, label %for.body
    102 
    103 for.end:                                          ; preds = %for.body, %entry
    104   ret void
    105 }
    106 
    107 ; Test that we can vectorize an intrinsic into a vector call.
    108 ;CHECK-LABEL: @exp_f32_intrin(
    109 ;CHECK: vexpf{{.*}}<4 x float>
    110 ;CHECK: ret void
    111 declare float @llvm.exp.f32(float) nounwind readnone
    112 define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    113 entry:
    114   %cmp6 = icmp sgt i32 %n, 0
    115   br i1 %cmp6, label %for.body, label %for.end
    116 
    117 for.body:                                         ; preds = %entry, %for.body
    118   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    119   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
    120   %0 = load float, float* %arrayidx, align 4
    121   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
    122   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
    123   store float %call, float* %arrayidx2, align 4
    124   %indvars.iv.next = add i64 %indvars.iv, 1
    125   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    126   %exitcond = icmp eq i32 %lftr.wideiv, %n
    127   br i1 %exitcond, label %for.end, label %for.body
    128 
    129 for.end:                                          ; preds = %for.body, %entry
    130   ret void
    131 }
    132 
    133 ; Test that we don't vectorize arbitrary functions.
    134 ;CHECK-LABEL: @foo_f32(
    135 ;CHECK-NOT: foo{{.*}}<4 x float>
    136 ;CHECK: ret void
    137 declare float @foo(float) nounwind readnone
    138 define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    139 entry:
    140   %cmp6 = icmp sgt i32 %n, 0
    141   br i1 %cmp6, label %for.body, label %for.end
    142 
    143 for.body:                                         ; preds = %entry, %for.body
    144   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    145   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
    146   %0 = load float, float* %arrayidx, align 4
    147   %call = tail call float @foo(float %0) nounwind readnone
    148   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
    149   store float %call, float* %arrayidx2, align 4
    150   %indvars.iv.next = add i64 %indvars.iv, 1
    151   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    152   %exitcond = icmp eq i32 %lftr.wideiv, %n
    153   br i1 %exitcond, label %for.end, label %for.body
    154 
    155 for.end:                                          ; preds = %for.body, %entry
    156   ret void
    157 }
    158 
    159 ; Test that we don't vectorize calls with nobuiltin attribute.
    160 ;CHECK-LABEL: @sqrt_f32_nobuiltin(
    161 ;CHECK-NOT: vsqrtf{{.*}}<4 x float>
    162 ;CHECK: ret void
    163 define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    164 entry:
    165   %cmp6 = icmp sgt i32 %n, 0
    166   br i1 %cmp6, label %for.body, label %for.end
    167 
    168 for.body:                                         ; preds = %entry, %for.body
    169   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    170   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
    171   %0 = load float, float* %arrayidx, align 4
    172   %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin
    173   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
    174   store float %call, float* %arrayidx2, align 4
    175   %indvars.iv.next = add i64 %indvars.iv, 1
    176   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    177   %exitcond = icmp eq i32 %lftr.wideiv, %n
    178   br i1 %exitcond, label %for.end, label %for.body
    179 
    180 for.end:                                          ; preds = %for.body, %entry
    181   ret void
    182 }
    183