1 ; RUN: opt < %s -vector-library=Accelerate -loop-vectorize -S | FileCheck %s 2 3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-linux-gnu" 5 6 ;CHECK-LABEL: @sqrt_f32( 7 ;CHECK: vsqrtf{{.*}}<4 x float> 8 ;CHECK: ret void 9 declare float @sqrtf(float) nounwind readnone 10 define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 11 entry: 12 %cmp6 = icmp sgt i32 %n, 0 13 br i1 %cmp6, label %for.body, label %for.end 14 15 for.body: ; preds = %entry, %for.body 16 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 17 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 18 %0 = load float, float* %arrayidx, align 4 19 %call = tail call float @sqrtf(float %0) nounwind readnone 20 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 21 store float %call, float* %arrayidx2, align 4 22 %indvars.iv.next = add i64 %indvars.iv, 1 23 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 24 %exitcond = icmp eq i32 %lftr.wideiv, %n 25 br i1 %exitcond, label %for.end, label %for.body 26 27 for.end: ; preds = %for.body, %entry 28 ret void 29 } 30 31 ;CHECK-LABEL: @exp_f32( 32 ;CHECK: vexpf{{.*}}<4 x float> 33 ;CHECK: ret void 34 declare float @expf(float) nounwind readnone 35 define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 36 entry: 37 %cmp6 = icmp sgt i32 %n, 0 38 br i1 %cmp6, label %for.body, label %for.end 39 40 for.body: ; preds = %entry, %for.body 41 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 42 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 43 %0 = load float, float* %arrayidx, align 4 44 %call = tail call float @expf(float %0) nounwind readnone 45 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 46 store float %call, float* %arrayidx2, align 4 47 %indvars.iv.next = add i64 %indvars.iv, 1 48 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 49 %exitcond = icmp eq i32 %lftr.wideiv, %n 50 br i1 %exitcond, label %for.end, label %for.body 51 52 for.end: ; preds = %for.body, %entry 53 ret void 54 } 55 56 ;CHECK-LABEL: @log_f32( 57 ;CHECK: vlogf{{.*}}<4 x float> 58 ;CHECK: ret void 59 declare float @logf(float) nounwind readnone 60 define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 61 entry: 62 %cmp6 = icmp sgt i32 %n, 0 63 br i1 %cmp6, label %for.body, label %for.end 64 65 for.body: ; preds = %entry, %for.body 66 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 67 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 68 %0 = load float, float* %arrayidx, align 4 69 %call = tail call float @logf(float %0) nounwind readnone 70 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 71 store float %call, float* %arrayidx2, align 4 72 %indvars.iv.next = add i64 %indvars.iv, 1 73 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 74 %exitcond = icmp eq i32 %lftr.wideiv, %n 75 br i1 %exitcond, label %for.end, label %for.body 76 77 for.end: ; preds = %for.body, %entry 78 ret void 79 } 80 81 ; For abs instruction we'll generate vector intrinsic, as it's cheaper than a lib call. 82 ;CHECK-LABEL: @fabs_f32( 83 ;CHECK: fabs{{.*}}<4 x float> 84 ;CHECK: ret void 85 declare float @fabsf(float) nounwind readnone 86 define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 87 entry: 88 %cmp6 = icmp sgt i32 %n, 0 89 br i1 %cmp6, label %for.body, label %for.end 90 91 for.body: ; preds = %entry, %for.body 92 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 93 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 94 %0 = load float, float* %arrayidx, align 4 95 %call = tail call float @fabsf(float %0) nounwind readnone 96 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 97 store float %call, float* %arrayidx2, align 4 98 %indvars.iv.next = add i64 %indvars.iv, 1 99 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 100 %exitcond = icmp eq i32 %lftr.wideiv, %n 101 br i1 %exitcond, label %for.end, label %for.body 102 103 for.end: ; preds = %for.body, %entry 104 ret void 105 } 106 107 ; Test that we can vectorize an intrinsic into a vector call. 108 ;CHECK-LABEL: @exp_f32_intrin( 109 ;CHECK: vexpf{{.*}}<4 x float> 110 ;CHECK: ret void 111 declare float @llvm.exp.f32(float) nounwind readnone 112 define void @exp_f32_intrin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 113 entry: 114 %cmp6 = icmp sgt i32 %n, 0 115 br i1 %cmp6, label %for.body, label %for.end 116 117 for.body: ; preds = %entry, %for.body 118 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 119 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 120 %0 = load float, float* %arrayidx, align 4 121 %call = tail call float @llvm.exp.f32(float %0) nounwind readnone 122 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 123 store float %call, float* %arrayidx2, align 4 124 %indvars.iv.next = add i64 %indvars.iv, 1 125 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 126 %exitcond = icmp eq i32 %lftr.wideiv, %n 127 br i1 %exitcond, label %for.end, label %for.body 128 129 for.end: ; preds = %for.body, %entry 130 ret void 131 } 132 133 ; Test that we don't vectorize arbitrary functions. 134 ;CHECK-LABEL: @foo_f32( 135 ;CHECK-NOT: foo{{.*}}<4 x float> 136 ;CHECK: ret void 137 declare float @foo(float) nounwind readnone 138 define void @foo_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 139 entry: 140 %cmp6 = icmp sgt i32 %n, 0 141 br i1 %cmp6, label %for.body, label %for.end 142 143 for.body: ; preds = %entry, %for.body 144 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 145 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 146 %0 = load float, float* %arrayidx, align 4 147 %call = tail call float @foo(float %0) nounwind readnone 148 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 149 store float %call, float* %arrayidx2, align 4 150 %indvars.iv.next = add i64 %indvars.iv, 1 151 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 152 %exitcond = icmp eq i32 %lftr.wideiv, %n 153 br i1 %exitcond, label %for.end, label %for.body 154 155 for.end: ; preds = %for.body, %entry 156 ret void 157 } 158 159 ; Test that we don't vectorize calls with nobuiltin attribute. 160 ;CHECK-LABEL: @sqrt_f32_nobuiltin( 161 ;CHECK-NOT: vsqrtf{{.*}}<4 x float> 162 ;CHECK: ret void 163 define void @sqrt_f32_nobuiltin(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable { 164 entry: 165 %cmp6 = icmp sgt i32 %n, 0 166 br i1 %cmp6, label %for.body, label %for.end 167 168 for.body: ; preds = %entry, %for.body 169 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 170 %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv 171 %0 = load float, float* %arrayidx, align 4 172 %call = tail call float @sqrtf(float %0) nounwind readnone nobuiltin 173 %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv 174 store float %call, float* %arrayidx2, align 4 175 %indvars.iv.next = add i64 %indvars.iv, 1 176 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 177 %exitcond = icmp eq i32 %lftr.wideiv, %n 178 br i1 %exitcond, label %for.end, label %for.body 179 180 for.end: ; preds = %for.body, %entry 181 ret void 182 } 183