Home | History | Annotate | Download | only in LoopVectorize
      1 ; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-linux-gnu"
      5 
      6 ;CHECK-LABEL: @sqrt_f32(
      7 ;CHECK: llvm.sqrt.v4f32
      8 ;CHECK: ret void
      9 define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     10 entry:
     11   %cmp6 = icmp sgt i32 %n, 0
     12   br i1 %cmp6, label %for.body, label %for.end
     13 
     14 for.body:                                         ; preds = %entry, %for.body
     15   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     16   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
     17   %0 = load float* %arrayidx, align 4
     18   %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
     19   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
     20   store float %call, float* %arrayidx2, align 4
     21   %indvars.iv.next = add i64 %indvars.iv, 1
     22   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     23   %exitcond = icmp eq i32 %lftr.wideiv, %n
     24   br i1 %exitcond, label %for.end, label %for.body
     25 
     26 for.end:                                          ; preds = %for.body, %entry
     27   ret void
     28 }
     29 
     30 declare float @llvm.sqrt.f32(float) nounwind readnone
     31 
     32 ;CHECK-LABEL: @sqrt_f64(
     33 ;CHECK: llvm.sqrt.v4f64
     34 ;CHECK: ret void
     35 define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
     36 entry:
     37   %cmp6 = icmp sgt i32 %n, 0
     38   br i1 %cmp6, label %for.body, label %for.end
     39 
     40 for.body:                                         ; preds = %entry, %for.body
     41   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     42   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
     43   %0 = load double* %arrayidx, align 8
     44   %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
     45   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
     46   store double %call, double* %arrayidx2, align 8
     47   %indvars.iv.next = add i64 %indvars.iv, 1
     48   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     49   %exitcond = icmp eq i32 %lftr.wideiv, %n
     50   br i1 %exitcond, label %for.end, label %for.body
     51 
     52 for.end:                                          ; preds = %for.body, %entry
     53   ret void
     54 }
     55 
     56 declare double @llvm.sqrt.f64(double) nounwind readnone
     57 
     58 ;CHECK-LABEL: @sin_f32(
     59 ;CHECK: llvm.sin.v4f32
     60 ;CHECK: ret void
     61 define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
     62 entry:
     63   %cmp6 = icmp sgt i32 %n, 0
     64   br i1 %cmp6, label %for.body, label %for.end
     65 
     66 for.body:                                         ; preds = %entry, %for.body
     67   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     68   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
     69   %0 = load float* %arrayidx, align 4
     70   %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
     71   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
     72   store float %call, float* %arrayidx2, align 4
     73   %indvars.iv.next = add i64 %indvars.iv, 1
     74   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
     75   %exitcond = icmp eq i32 %lftr.wideiv, %n
     76   br i1 %exitcond, label %for.end, label %for.body
     77 
     78 for.end:                                          ; preds = %for.body, %entry
     79   ret void
     80 }
     81 
     82 declare float @llvm.sin.f32(float) nounwind readnone
     83 
     84 ;CHECK-LABEL: @sin_f64(
     85 ;CHECK: llvm.sin.v4f64
     86 ;CHECK: ret void
     87 define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
     88 entry:
     89   %cmp6 = icmp sgt i32 %n, 0
     90   br i1 %cmp6, label %for.body, label %for.end
     91 
     92 for.body:                                         ; preds = %entry, %for.body
     93   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
     94   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
     95   %0 = load double* %arrayidx, align 8
     96   %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
     97   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
     98   store double %call, double* %arrayidx2, align 8
     99   %indvars.iv.next = add i64 %indvars.iv, 1
    100   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    101   %exitcond = icmp eq i32 %lftr.wideiv, %n
    102   br i1 %exitcond, label %for.end, label %for.body
    103 
    104 for.end:                                          ; preds = %for.body, %entry
    105   ret void
    106 }
    107 
    108 declare double @llvm.sin.f64(double) nounwind readnone
    109 
    110 ;CHECK-LABEL: @cos_f32(
    111 ;CHECK: llvm.cos.v4f32
    112 ;CHECK: ret void
    113 define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    114 entry:
    115   %cmp6 = icmp sgt i32 %n, 0
    116   br i1 %cmp6, label %for.body, label %for.end
    117 
    118 for.body:                                         ; preds = %entry, %for.body
    119   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    120   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    121   %0 = load float* %arrayidx, align 4
    122   %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
    123   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    124   store float %call, float* %arrayidx2, align 4
    125   %indvars.iv.next = add i64 %indvars.iv, 1
    126   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    127   %exitcond = icmp eq i32 %lftr.wideiv, %n
    128   br i1 %exitcond, label %for.end, label %for.body
    129 
    130 for.end:                                          ; preds = %for.body, %entry
    131   ret void
    132 }
    133 
    134 declare float @llvm.cos.f32(float) nounwind readnone
    135 
    136 ;CHECK-LABEL: @cos_f64(
    137 ;CHECK: llvm.cos.v4f64
    138 ;CHECK: ret void
    139 define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    140 entry:
    141   %cmp6 = icmp sgt i32 %n, 0
    142   br i1 %cmp6, label %for.body, label %for.end
    143 
    144 for.body:                                         ; preds = %entry, %for.body
    145   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    146   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    147   %0 = load double* %arrayidx, align 8
    148   %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
    149   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    150   store double %call, double* %arrayidx2, align 8
    151   %indvars.iv.next = add i64 %indvars.iv, 1
    152   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    153   %exitcond = icmp eq i32 %lftr.wideiv, %n
    154   br i1 %exitcond, label %for.end, label %for.body
    155 
    156 for.end:                                          ; preds = %for.body, %entry
    157   ret void
    158 }
    159 
    160 declare double @llvm.cos.f64(double) nounwind readnone
    161 
    162 ;CHECK-LABEL: @exp_f32(
    163 ;CHECK: llvm.exp.v4f32
    164 ;CHECK: ret void
    165 define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    166 entry:
    167   %cmp6 = icmp sgt i32 %n, 0
    168   br i1 %cmp6, label %for.body, label %for.end
    169 
    170 for.body:                                         ; preds = %entry, %for.body
    171   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    172   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    173   %0 = load float* %arrayidx, align 4
    174   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
    175   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    176   store float %call, float* %arrayidx2, align 4
    177   %indvars.iv.next = add i64 %indvars.iv, 1
    178   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    179   %exitcond = icmp eq i32 %lftr.wideiv, %n
    180   br i1 %exitcond, label %for.end, label %for.body
    181 
    182 for.end:                                          ; preds = %for.body, %entry
    183   ret void
    184 }
    185 
    186 declare float @llvm.exp.f32(float) nounwind readnone
    187 
    188 ;CHECK-LABEL: @exp_f64(
    189 ;CHECK: llvm.exp.v4f64
    190 ;CHECK: ret void
    191 define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    192 entry:
    193   %cmp6 = icmp sgt i32 %n, 0
    194   br i1 %cmp6, label %for.body, label %for.end
    195 
    196 for.body:                                         ; preds = %entry, %for.body
    197   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    198   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    199   %0 = load double* %arrayidx, align 8
    200   %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
    201   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    202   store double %call, double* %arrayidx2, align 8
    203   %indvars.iv.next = add i64 %indvars.iv, 1
    204   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    205   %exitcond = icmp eq i32 %lftr.wideiv, %n
    206   br i1 %exitcond, label %for.end, label %for.body
    207 
    208 for.end:                                          ; preds = %for.body, %entry
    209   ret void
    210 }
    211 
    212 declare double @llvm.exp.f64(double) nounwind readnone
    213 
    214 ;CHECK-LABEL: @exp2_f32(
    215 ;CHECK: llvm.exp2.v4f32
    216 ;CHECK: ret void
    217 define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    218 entry:
    219   %cmp6 = icmp sgt i32 %n, 0
    220   br i1 %cmp6, label %for.body, label %for.end
    221 
    222 for.body:                                         ; preds = %entry, %for.body
    223   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    224   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    225   %0 = load float* %arrayidx, align 4
    226   %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
    227   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    228   store float %call, float* %arrayidx2, align 4
    229   %indvars.iv.next = add i64 %indvars.iv, 1
    230   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    231   %exitcond = icmp eq i32 %lftr.wideiv, %n
    232   br i1 %exitcond, label %for.end, label %for.body
    233 
    234 for.end:                                          ; preds = %for.body, %entry
    235   ret void
    236 }
    237 
    238 declare float @llvm.exp2.f32(float) nounwind readnone
    239 
    240 ;CHECK-LABEL: @exp2_f64(
    241 ;CHECK: llvm.exp2.v4f64
    242 ;CHECK: ret void
    243 define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    244 entry:
    245   %cmp6 = icmp sgt i32 %n, 0
    246   br i1 %cmp6, label %for.body, label %for.end
    247 
    248 for.body:                                         ; preds = %entry, %for.body
    249   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    250   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    251   %0 = load double* %arrayidx, align 8
    252   %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
    253   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    254   store double %call, double* %arrayidx2, align 8
    255   %indvars.iv.next = add i64 %indvars.iv, 1
    256   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    257   %exitcond = icmp eq i32 %lftr.wideiv, %n
    258   br i1 %exitcond, label %for.end, label %for.body
    259 
    260 for.end:                                          ; preds = %for.body, %entry
    261   ret void
    262 }
    263 
    264 declare double @llvm.exp2.f64(double) nounwind readnone
    265 
    266 ;CHECK-LABEL: @log_f32(
    267 ;CHECK: llvm.log.v4f32
    268 ;CHECK: ret void
    269 define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    270 entry:
    271   %cmp6 = icmp sgt i32 %n, 0
    272   br i1 %cmp6, label %for.body, label %for.end
    273 
    274 for.body:                                         ; preds = %entry, %for.body
    275   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    276   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    277   %0 = load float* %arrayidx, align 4
    278   %call = tail call float @llvm.log.f32(float %0) nounwind readnone
    279   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    280   store float %call, float* %arrayidx2, align 4
    281   %indvars.iv.next = add i64 %indvars.iv, 1
    282   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    283   %exitcond = icmp eq i32 %lftr.wideiv, %n
    284   br i1 %exitcond, label %for.end, label %for.body
    285 
    286 for.end:                                          ; preds = %for.body, %entry
    287   ret void
    288 }
    289 
    290 declare float @llvm.log.f32(float) nounwind readnone
    291 
    292 ;CHECK-LABEL: @log_f64(
    293 ;CHECK: llvm.log.v4f64
    294 ;CHECK: ret void
    295 define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    296 entry:
    297   %cmp6 = icmp sgt i32 %n, 0
    298   br i1 %cmp6, label %for.body, label %for.end
    299 
    300 for.body:                                         ; preds = %entry, %for.body
    301   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    302   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    303   %0 = load double* %arrayidx, align 8
    304   %call = tail call double @llvm.log.f64(double %0) nounwind readnone
    305   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    306   store double %call, double* %arrayidx2, align 8
    307   %indvars.iv.next = add i64 %indvars.iv, 1
    308   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    309   %exitcond = icmp eq i32 %lftr.wideiv, %n
    310   br i1 %exitcond, label %for.end, label %for.body
    311 
    312 for.end:                                          ; preds = %for.body, %entry
    313   ret void
    314 }
    315 
    316 declare double @llvm.log.f64(double) nounwind readnone
    317 
    318 ;CHECK-LABEL: @log10_f32(
    319 ;CHECK: llvm.log10.v4f32
    320 ;CHECK: ret void
    321 define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    322 entry:
    323   %cmp6 = icmp sgt i32 %n, 0
    324   br i1 %cmp6, label %for.body, label %for.end
    325 
    326 for.body:                                         ; preds = %entry, %for.body
    327   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    328   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    329   %0 = load float* %arrayidx, align 4
    330   %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
    331   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    332   store float %call, float* %arrayidx2, align 4
    333   %indvars.iv.next = add i64 %indvars.iv, 1
    334   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    335   %exitcond = icmp eq i32 %lftr.wideiv, %n
    336   br i1 %exitcond, label %for.end, label %for.body
    337 
    338 for.end:                                          ; preds = %for.body, %entry
    339   ret void
    340 }
    341 
    342 declare float @llvm.log10.f32(float) nounwind readnone
    343 
    344 ;CHECK-LABEL: @log10_f64(
    345 ;CHECK: llvm.log10.v4f64
    346 ;CHECK: ret void
    347 define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    348 entry:
    349   %cmp6 = icmp sgt i32 %n, 0
    350   br i1 %cmp6, label %for.body, label %for.end
    351 
    352 for.body:                                         ; preds = %entry, %for.body
    353   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    354   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    355   %0 = load double* %arrayidx, align 8
    356   %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
    357   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    358   store double %call, double* %arrayidx2, align 8
    359   %indvars.iv.next = add i64 %indvars.iv, 1
    360   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    361   %exitcond = icmp eq i32 %lftr.wideiv, %n
    362   br i1 %exitcond, label %for.end, label %for.body
    363 
    364 for.end:                                          ; preds = %for.body, %entry
    365   ret void
    366 }
    367 
    368 declare double @llvm.log10.f64(double) nounwind readnone
    369 
    370 ;CHECK-LABEL: @log2_f32(
    371 ;CHECK: llvm.log2.v4f32
    372 ;CHECK: ret void
    373 define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    374 entry:
    375   %cmp6 = icmp sgt i32 %n, 0
    376   br i1 %cmp6, label %for.body, label %for.end
    377 
    378 for.body:                                         ; preds = %entry, %for.body
    379   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    380   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    381   %0 = load float* %arrayidx, align 4
    382   %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
    383   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    384   store float %call, float* %arrayidx2, align 4
    385   %indvars.iv.next = add i64 %indvars.iv, 1
    386   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    387   %exitcond = icmp eq i32 %lftr.wideiv, %n
    388   br i1 %exitcond, label %for.end, label %for.body
    389 
    390 for.end:                                          ; preds = %for.body, %entry
    391   ret void
    392 }
    393 
    394 declare float @llvm.log2.f32(float) nounwind readnone
    395 
    396 ;CHECK-LABEL: @log2_f64(
    397 ;CHECK: llvm.log2.v4f64
    398 ;CHECK: ret void
    399 define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    400 entry:
    401   %cmp6 = icmp sgt i32 %n, 0
    402   br i1 %cmp6, label %for.body, label %for.end
    403 
    404 for.body:                                         ; preds = %entry, %for.body
    405   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    406   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    407   %0 = load double* %arrayidx, align 8
    408   %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
    409   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    410   store double %call, double* %arrayidx2, align 8
    411   %indvars.iv.next = add i64 %indvars.iv, 1
    412   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    413   %exitcond = icmp eq i32 %lftr.wideiv, %n
    414   br i1 %exitcond, label %for.end, label %for.body
    415 
    416 for.end:                                          ; preds = %for.body, %entry
    417   ret void
    418 }
    419 
    420 declare double @llvm.log2.f64(double) nounwind readnone
    421 
    422 ;CHECK-LABEL: @fabs_f32(
    423 ;CHECK: llvm.fabs.v4f32
    424 ;CHECK: ret void
    425 define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    426 entry:
    427   %cmp6 = icmp sgt i32 %n, 0
    428   br i1 %cmp6, label %for.body, label %for.end
    429 
    430 for.body:                                         ; preds = %entry, %for.body
    431   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    432   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    433   %0 = load float* %arrayidx, align 4
    434   %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
    435   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    436   store float %call, float* %arrayidx2, align 4
    437   %indvars.iv.next = add i64 %indvars.iv, 1
    438   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    439   %exitcond = icmp eq i32 %lftr.wideiv, %n
    440   br i1 %exitcond, label %for.end, label %for.body
    441 
    442 for.end:                                          ; preds = %for.body, %entry
    443   ret void
    444 }
    445 
    446 declare float @llvm.fabs.f32(float) nounwind readnone
    447 
    448 define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    449 entry:
    450   %cmp6 = icmp sgt i32 %n, 0
    451   br i1 %cmp6, label %for.body, label %for.end
    452 
    453 for.body:                                         ; preds = %entry, %for.body
    454   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    455   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    456   %0 = load double* %arrayidx, align 8
    457   %call = tail call double @llvm.fabs(double %0) nounwind readnone
    458   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    459   store double %call, double* %arrayidx2, align 8
    460   %indvars.iv.next = add i64 %indvars.iv, 1
    461   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    462   %exitcond = icmp eq i32 %lftr.wideiv, %n
    463   br i1 %exitcond, label %for.end, label %for.body
    464 
    465 for.end:                                          ; preds = %for.body, %entry
    466   ret void
    467 }
    468 
    469 declare double @llvm.fabs(double) nounwind readnone
    470 
    471 ;CHECK-LABEL: @copysign_f32(
    472 ;CHECK: llvm.copysign.v4f32
    473 ;CHECK: ret void
    474 define void @copysign_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
    475 entry:
    476   %cmp6 = icmp sgt i32 %n, 0
    477   br i1 %cmp6, label %for.body, label %for.end
    478 
    479 for.body:                                         ; preds = %entry, %for.body
    480   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    481   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    482   %0 = load float* %arrayidx, align 4
    483   %arrayidx1 = getelementptr inbounds float* %z, i64 %indvars.iv
    484   %1 = load float* %arrayidx1, align 4
    485   %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
    486   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    487   store float %call, float* %arrayidx2, align 4
    488   %indvars.iv.next = add i64 %indvars.iv, 1
    489   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    490   %exitcond = icmp eq i32 %lftr.wideiv, %n
    491   br i1 %exitcond, label %for.end, label %for.body
    492 
    493 for.end:                                          ; preds = %for.body, %entry
    494   ret void
    495 }
    496 
    497 declare float @llvm.copysign.f32(float, float) nounwind readnone
    498 
    499 define void @copysign_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
    500 entry:
    501   %cmp6 = icmp sgt i32 %n, 0
    502   br i1 %cmp6, label %for.body, label %for.end
    503 
    504 for.body:                                         ; preds = %entry, %for.body
    505   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    506   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    507   %0 = load double* %arrayidx, align 8
    508   %arrayidx1 = getelementptr inbounds double* %z, i64 %indvars.iv
    509   %1 = load double* %arrayidx, align 8
    510   %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
    511   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    512   store double %call, double* %arrayidx2, align 8
    513   %indvars.iv.next = add i64 %indvars.iv, 1
    514   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    515   %exitcond = icmp eq i32 %lftr.wideiv, %n
    516   br i1 %exitcond, label %for.end, label %for.body
    517 
    518 for.end:                                          ; preds = %for.body, %entry
    519   ret void
    520 }
    521 
    522 declare double @llvm.copysign(double, double) nounwind readnone
    523 
    524 ;CHECK-LABEL: @floor_f32(
    525 ;CHECK: llvm.floor.v4f32
    526 ;CHECK: ret void
    527 define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    528 entry:
    529   %cmp6 = icmp sgt i32 %n, 0
    530   br i1 %cmp6, label %for.body, label %for.end
    531 
    532 for.body:                                         ; preds = %entry, %for.body
    533   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    534   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    535   %0 = load float* %arrayidx, align 4
    536   %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
    537   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    538   store float %call, float* %arrayidx2, align 4
    539   %indvars.iv.next = add i64 %indvars.iv, 1
    540   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    541   %exitcond = icmp eq i32 %lftr.wideiv, %n
    542   br i1 %exitcond, label %for.end, label %for.body
    543 
    544 for.end:                                          ; preds = %for.body, %entry
    545   ret void
    546 }
    547 
    548 declare float @llvm.floor.f32(float) nounwind readnone
    549 
    550 ;CHECK-LABEL: @floor_f64(
    551 ;CHECK: llvm.floor.v4f64
    552 ;CHECK: ret void
    553 define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    554 entry:
    555   %cmp6 = icmp sgt i32 %n, 0
    556   br i1 %cmp6, label %for.body, label %for.end
    557 
    558 for.body:                                         ; preds = %entry, %for.body
    559   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    560   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    561   %0 = load double* %arrayidx, align 8
    562   %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
    563   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    564   store double %call, double* %arrayidx2, align 8
    565   %indvars.iv.next = add i64 %indvars.iv, 1
    566   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    567   %exitcond = icmp eq i32 %lftr.wideiv, %n
    568   br i1 %exitcond, label %for.end, label %for.body
    569 
    570 for.end:                                          ; preds = %for.body, %entry
    571   ret void
    572 }
    573 
    574 declare double @llvm.floor.f64(double) nounwind readnone
    575 
    576 ;CHECK-LABEL: @ceil_f32(
    577 ;CHECK: llvm.ceil.v4f32
    578 ;CHECK: ret void
    579 define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    580 entry:
    581   %cmp6 = icmp sgt i32 %n, 0
    582   br i1 %cmp6, label %for.body, label %for.end
    583 
    584 for.body:                                         ; preds = %entry, %for.body
    585   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    586   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    587   %0 = load float* %arrayidx, align 4
    588   %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
    589   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    590   store float %call, float* %arrayidx2, align 4
    591   %indvars.iv.next = add i64 %indvars.iv, 1
    592   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    593   %exitcond = icmp eq i32 %lftr.wideiv, %n
    594   br i1 %exitcond, label %for.end, label %for.body
    595 
    596 for.end:                                          ; preds = %for.body, %entry
    597   ret void
    598 }
    599 
    600 declare float @llvm.ceil.f32(float) nounwind readnone
    601 
    602 ;CHECK-LABEL: @ceil_f64(
    603 ;CHECK: llvm.ceil.v4f64
    604 ;CHECK: ret void
    605 define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    606 entry:
    607   %cmp6 = icmp sgt i32 %n, 0
    608   br i1 %cmp6, label %for.body, label %for.end
    609 
    610 for.body:                                         ; preds = %entry, %for.body
    611   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    612   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    613   %0 = load double* %arrayidx, align 8
    614   %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
    615   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    616   store double %call, double* %arrayidx2, align 8
    617   %indvars.iv.next = add i64 %indvars.iv, 1
    618   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    619   %exitcond = icmp eq i32 %lftr.wideiv, %n
    620   br i1 %exitcond, label %for.end, label %for.body
    621 
    622 for.end:                                          ; preds = %for.body, %entry
    623   ret void
    624 }
    625 
    626 declare double @llvm.ceil.f64(double) nounwind readnone
    627 
    628 ;CHECK-LABEL: @trunc_f32(
    629 ;CHECK: llvm.trunc.v4f32
    630 ;CHECK: ret void
    631 define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    632 entry:
    633   %cmp6 = icmp sgt i32 %n, 0
    634   br i1 %cmp6, label %for.body, label %for.end
    635 
    636 for.body:                                         ; preds = %entry, %for.body
    637   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    638   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    639   %0 = load float* %arrayidx, align 4
    640   %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
    641   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    642   store float %call, float* %arrayidx2, align 4
    643   %indvars.iv.next = add i64 %indvars.iv, 1
    644   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    645   %exitcond = icmp eq i32 %lftr.wideiv, %n
    646   br i1 %exitcond, label %for.end, label %for.body
    647 
    648 for.end:                                          ; preds = %for.body, %entry
    649   ret void
    650 }
    651 
    652 declare float @llvm.trunc.f32(float) nounwind readnone
    653 
    654 ;CHECK-LABEL: @trunc_f64(
    655 ;CHECK: llvm.trunc.v4f64
    656 ;CHECK: ret void
    657 define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    658 entry:
    659   %cmp6 = icmp sgt i32 %n, 0
    660   br i1 %cmp6, label %for.body, label %for.end
    661 
    662 for.body:                                         ; preds = %entry, %for.body
    663   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    664   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    665   %0 = load double* %arrayidx, align 8
    666   %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
    667   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    668   store double %call, double* %arrayidx2, align 8
    669   %indvars.iv.next = add i64 %indvars.iv, 1
    670   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    671   %exitcond = icmp eq i32 %lftr.wideiv, %n
    672   br i1 %exitcond, label %for.end, label %for.body
    673 
    674 for.end:                                          ; preds = %for.body, %entry
    675   ret void
    676 }
    677 
    678 declare double @llvm.trunc.f64(double) nounwind readnone
    679 
    680 ;CHECK-LABEL: @rint_f32(
    681 ;CHECK: llvm.rint.v4f32
    682 ;CHECK: ret void
    683 define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    684 entry:
    685   %cmp6 = icmp sgt i32 %n, 0
    686   br i1 %cmp6, label %for.body, label %for.end
    687 
    688 for.body:                                         ; preds = %entry, %for.body
    689   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    690   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    691   %0 = load float* %arrayidx, align 4
    692   %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
    693   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    694   store float %call, float* %arrayidx2, align 4
    695   %indvars.iv.next = add i64 %indvars.iv, 1
    696   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    697   %exitcond = icmp eq i32 %lftr.wideiv, %n
    698   br i1 %exitcond, label %for.end, label %for.body
    699 
    700 for.end:                                          ; preds = %for.body, %entry
    701   ret void
    702 }
    703 
    704 declare float @llvm.rint.f32(float) nounwind readnone
    705 
    706 ;CHECK-LABEL: @rint_f64(
    707 ;CHECK: llvm.rint.v4f64
    708 ;CHECK: ret void
    709 define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    710 entry:
    711   %cmp6 = icmp sgt i32 %n, 0
    712   br i1 %cmp6, label %for.body, label %for.end
    713 
    714 for.body:                                         ; preds = %entry, %for.body
    715   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    716   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    717   %0 = load double* %arrayidx, align 8
    718   %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
    719   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    720   store double %call, double* %arrayidx2, align 8
    721   %indvars.iv.next = add i64 %indvars.iv, 1
    722   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    723   %exitcond = icmp eq i32 %lftr.wideiv, %n
    724   br i1 %exitcond, label %for.end, label %for.body
    725 
    726 for.end:                                          ; preds = %for.body, %entry
    727   ret void
    728 }
    729 
    730 declare double @llvm.rint.f64(double) nounwind readnone
    731 
    732 ;CHECK-LABEL: @nearbyint_f32(
    733 ;CHECK: llvm.nearbyint.v4f32
    734 ;CHECK: ret void
    735 define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    736 entry:
    737   %cmp6 = icmp sgt i32 %n, 0
    738   br i1 %cmp6, label %for.body, label %for.end
    739 
    740 for.body:                                         ; preds = %entry, %for.body
    741   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    742   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    743   %0 = load float* %arrayidx, align 4
    744   %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
    745   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    746   store float %call, float* %arrayidx2, align 4
    747   %indvars.iv.next = add i64 %indvars.iv, 1
    748   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    749   %exitcond = icmp eq i32 %lftr.wideiv, %n
    750   br i1 %exitcond, label %for.end, label %for.body
    751 
    752 for.end:                                          ; preds = %for.body, %entry
    753   ret void
    754 }
    755 
    756 declare float @llvm.nearbyint.f32(float) nounwind readnone
    757 
    758 ;CHECK-LABEL: @nearbyint_f64(
    759 ;CHECK: llvm.nearbyint.v4f64
    760 ;CHECK: ret void
    761 define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    762 entry:
    763   %cmp6 = icmp sgt i32 %n, 0
    764   br i1 %cmp6, label %for.body, label %for.end
    765 
    766 for.body:                                         ; preds = %entry, %for.body
    767   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    768   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    769   %0 = load double* %arrayidx, align 8
    770   %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
    771   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    772   store double %call, double* %arrayidx2, align 8
    773   %indvars.iv.next = add i64 %indvars.iv, 1
    774   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    775   %exitcond = icmp eq i32 %lftr.wideiv, %n
    776   br i1 %exitcond, label %for.end, label %for.body
    777 
    778 for.end:                                          ; preds = %for.body, %entry
    779   ret void
    780 }
    781 
    782 declare double @llvm.nearbyint.f64(double) nounwind readnone
    783 
    784 ;CHECK-LABEL: @round_f32(
    785 ;CHECK: llvm.round.v4f32
    786 ;CHECK: ret void
    787 define void @round_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
    788 entry:
    789   %cmp6 = icmp sgt i32 %n, 0
    790   br i1 %cmp6, label %for.body, label %for.end
    791 
    792 for.body:                                         ; preds = %entry, %for.body
    793   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    794   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    795   %0 = load float* %arrayidx, align 4
    796   %call = tail call float @llvm.round.f32(float %0) nounwind readnone
    797   %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
    798   store float %call, float* %arrayidx2, align 4
    799   %indvars.iv.next = add i64 %indvars.iv, 1
    800   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    801   %exitcond = icmp eq i32 %lftr.wideiv, %n
    802   br i1 %exitcond, label %for.end, label %for.body
    803 
    804 for.end:                                          ; preds = %for.body, %entry
    805   ret void
    806 }
    807 
    808 declare float @llvm.round.f32(float) nounwind readnone
    809 
    810 ;CHECK-LABEL: @round_f64(
    811 ;CHECK: llvm.round.v4f64
    812 ;CHECK: ret void
    813 define void @round_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
    814 entry:
    815   %cmp6 = icmp sgt i32 %n, 0
    816   br i1 %cmp6, label %for.body, label %for.end
    817 
    818 for.body:                                         ; preds = %entry, %for.body
    819   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    820   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    821   %0 = load double* %arrayidx, align 8
    822   %call = tail call double @llvm.round.f64(double %0) nounwind readnone
    823   %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
    824   store double %call, double* %arrayidx2, align 8
    825   %indvars.iv.next = add i64 %indvars.iv, 1
    826   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    827   %exitcond = icmp eq i32 %lftr.wideiv, %n
    828   br i1 %exitcond, label %for.end, label %for.body
    829 
    830 for.end:                                          ; preds = %for.body, %entry
    831   ret void
    832 }
    833 
    834 declare double @llvm.round.f64(double) nounwind readnone
    835 
    836 ;CHECK-LABEL: @fma_f32(
    837 ;CHECK: llvm.fma.v4f32
    838 ;CHECK: ret void
    839 define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
    840 entry:
    841   %cmp12 = icmp sgt i32 %n, 0
    842   br i1 %cmp12, label %for.body, label %for.end
    843 
    844 for.body:                                         ; preds = %entry, %for.body
    845   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    846   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    847   %0 = load float* %arrayidx, align 4
    848   %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
    849   %1 = load float* %arrayidx2, align 4
    850   %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
    851   %2 = load float* %arrayidx4, align 4
    852   %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
    853   %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
    854   store float %3, float* %arrayidx6, align 4
    855   %indvars.iv.next = add i64 %indvars.iv, 1
    856   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    857   %exitcond = icmp eq i32 %lftr.wideiv, %n
    858   br i1 %exitcond, label %for.end, label %for.body
    859 
    860 for.end:                                          ; preds = %for.body, %entry
    861   ret void
    862 }
    863 
    864 declare float @llvm.fma.f32(float, float, float) nounwind readnone
    865 
    866 ;CHECK-LABEL: @fma_f64(
    867 ;CHECK: llvm.fma.v4f64
    868 ;CHECK: ret void
    869 define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
    870 entry:
    871   %cmp12 = icmp sgt i32 %n, 0
    872   br i1 %cmp12, label %for.body, label %for.end
    873 
    874 for.body:                                         ; preds = %entry, %for.body
    875   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    876   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    877   %0 = load double* %arrayidx, align 8
    878   %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
    879   %1 = load double* %arrayidx2, align 8
    880   %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
    881   %2 = load double* %arrayidx4, align 8
    882   %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
    883   %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
    884   store double %3, double* %arrayidx6, align 8
    885   %indvars.iv.next = add i64 %indvars.iv, 1
    886   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    887   %exitcond = icmp eq i32 %lftr.wideiv, %n
    888   br i1 %exitcond, label %for.end, label %for.body
    889 
    890 for.end:                                          ; preds = %for.body, %entry
    891   ret void
    892 }
    893 
    894 declare double @llvm.fma.f64(double, double, double) nounwind readnone
    895 
    896 ;CHECK-LABEL: @fmuladd_f32(
    897 ;CHECK: llvm.fmuladd.v4f32
    898 ;CHECK: ret void
    899 define void @fmuladd_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
    900 entry:
    901   %cmp12 = icmp sgt i32 %n, 0
    902   br i1 %cmp12, label %for.body, label %for.end
    903 
    904 for.body:                                         ; preds = %entry, %for.body
    905   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    906   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    907   %0 = load float* %arrayidx, align 4
    908   %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
    909   %1 = load float* %arrayidx2, align 4
    910   %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
    911   %2 = load float* %arrayidx4, align 4
    912   %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
    913   %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
    914   store float %3, float* %arrayidx6, align 4
    915   %indvars.iv.next = add i64 %indvars.iv, 1
    916   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    917   %exitcond = icmp eq i32 %lftr.wideiv, %n
    918   br i1 %exitcond, label %for.end, label %for.body
    919 
    920 for.end:                                          ; preds = %for.body, %entry
    921   ret void
    922 }
    923 
    924 declare float @llvm.fmuladd.f32(float, float, float) nounwind readnone
    925 
    926 ;CHECK-LABEL: @fmuladd_f64(
    927 ;CHECK: llvm.fmuladd.v4f64
    928 ;CHECK: ret void
    929 define void @fmuladd_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
    930 entry:
    931   %cmp12 = icmp sgt i32 %n, 0
    932   br i1 %cmp12, label %for.body, label %for.end
    933 
    934 for.body:                                         ; preds = %entry, %for.body
    935   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    936   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    937   %0 = load double* %arrayidx, align 8
    938   %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
    939   %1 = load double* %arrayidx2, align 8
    940   %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
    941   %2 = load double* %arrayidx4, align 8
    942   %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
    943   %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
    944   store double %3, double* %arrayidx6, align 8
    945   %indvars.iv.next = add i64 %indvars.iv, 1
    946   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    947   %exitcond = icmp eq i32 %lftr.wideiv, %n
    948   br i1 %exitcond, label %for.end, label %for.body
    949 
    950 for.end:                                          ; preds = %for.body, %entry
    951   ret void
    952 }
    953 
    954 declare double @llvm.fmuladd.f64(double, double, double) nounwind readnone
    955 
    956 ;CHECK-LABEL: @pow_f32(
    957 ;CHECK: llvm.pow.v4f32
    958 ;CHECK: ret void
    959 define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
    960 entry:
    961   %cmp9 = icmp sgt i32 %n, 0
    962   br i1 %cmp9, label %for.body, label %for.end
    963 
    964 for.body:                                         ; preds = %entry, %for.body
    965   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    966   %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
    967   %0 = load float* %arrayidx, align 4
    968   %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
    969   %1 = load float* %arrayidx2, align 4
    970   %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
    971   %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
    972   store float %call, float* %arrayidx4, align 4
    973   %indvars.iv.next = add i64 %indvars.iv, 1
    974   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    975   %exitcond = icmp eq i32 %lftr.wideiv, %n
    976   br i1 %exitcond, label %for.end, label %for.body
    977 
    978 for.end:                                          ; preds = %for.body, %entry
    979   ret void
    980 }
    981 
    982 declare float @llvm.pow.f32(float, float) nounwind readnone
    983 
    984 ;CHECK-LABEL: @pow_f64(
    985 ;CHECK: llvm.pow.v4f64
    986 ;CHECK: ret void
    987 define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
    988 entry:
    989   %cmp9 = icmp sgt i32 %n, 0
    990   br i1 %cmp9, label %for.body, label %for.end
    991 
    992 for.body:                                         ; preds = %entry, %for.body
    993   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
    994   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
    995   %0 = load double* %arrayidx, align 8
    996   %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv
    997   %1 = load double* %arrayidx2, align 8
    998   %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
    999   %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
   1000   store double %call, double* %arrayidx4, align 8
   1001   %indvars.iv.next = add i64 %indvars.iv, 1
   1002   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1003   %exitcond = icmp eq i32 %lftr.wideiv, %n
   1004   br i1 %exitcond, label %for.end, label %for.body
   1005 
   1006 for.end:                                          ; preds = %for.body, %entry
   1007   ret void
   1008 }
   1009 
   1010 ; CHECK: fabs_libm
   1011 ; CHECK:  call <4 x float> @llvm.fabs.v4f32
   1012 ; CHECK: ret void
   1013 define void @fabs_libm(float* nocapture %x) nounwind {
   1014 entry:
   1015   br label %for.body
   1016 
   1017 for.body:                                         ; preds = %entry, %for.body
   1018   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   1019   %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
   1020   %0 = load float* %arrayidx, align 4
   1021   %call = tail call float @fabsf(float %0) nounwind readnone
   1022   store float %call, float* %arrayidx, align 4
   1023   %indvars.iv.next = add i64 %indvars.iv, 1
   1024   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1025   %exitcond = icmp eq i32 %lftr.wideiv, 1024
   1026   br i1 %exitcond, label %for.end, label %for.body
   1027 
   1028 for.end:                                          ; preds = %for.body
   1029   ret void
   1030 }
   1031 
   1032 declare float @fabsf(float) nounwind readnone
   1033 
   1034 declare double @llvm.pow.f64(double, double) nounwind readnone
   1035 
   1036 
   1037 
   1038 ; Make sure we don't replace calls to functions with standard library function
   1039 ; signatures but defined with internal linkage.
   1040 
   1041 define internal float @roundf(float %x) nounwind readnone {
   1042   ret float 0.00000000
   1043 }
   1044 ; CHECK-LABEL: internal_round
   1045 ; CHECK-NOT:  load <4 x float>
   1046 
   1047 define void @internal_round(float* nocapture %x) nounwind {
   1048 entry:
   1049   br label %for.body
   1050 
   1051 for.body:                                         ; preds = %entry, %for.body
   1052   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   1053   %arrayidx = getelementptr inbounds float* %x, i64 %indvars.iv
   1054   %0 = load float* %arrayidx, align 4
   1055   %call = tail call float @roundf(float %0) nounwind readnone
   1056   store float %call, float* %arrayidx, align 4
   1057   %indvars.iv.next = add i64 %indvars.iv, 1
   1058   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1059   %exitcond = icmp eq i32 %lftr.wideiv, 1024
   1060   br i1 %exitcond, label %for.end, label %for.body
   1061 
   1062 for.end:                                          ; preds = %for.body
   1063   ret void
   1064 }
   1065 
   1066 ; Make sure we don't replace calls to functions with standard library names but
   1067 ; different signatures.
   1068 
   1069 declare void @round(double %f)
   1070 
   1071 ; CHECK-LABEL: wrong_signature
   1072 ; CHECK-NOT:  load <4 x double>
   1073 
   1074 define void @wrong_signature(double* nocapture %x) nounwind {
   1075 entry:
   1076   br label %for.body
   1077 
   1078 for.body:                                         ; preds = %entry, %for.body
   1079   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   1080   %arrayidx = getelementptr inbounds double* %x, i64 %indvars.iv
   1081   %0 = load double* %arrayidx, align 4
   1082   store double %0, double* %arrayidx, align 4
   1083   tail call void @round(double %0) nounwind readnone
   1084   %indvars.iv.next = add i64 %indvars.iv, 1
   1085   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1086   %exitcond = icmp eq i32 %lftr.wideiv, 1024
   1087   br i1 %exitcond, label %for.end, label %for.body
   1088 
   1089 for.end:                                          ; preds = %for.body
   1090   ret void
   1091 }
   1092 
   1093 declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone
   1094 
   1095 ;CHECK-LABEL: @powi_f64(
   1096 ;CHECK: llvm.powi.v4f64
   1097 ;CHECK: ret void
   1098 define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable {
   1099 entry:
   1100   %cmp9 = icmp sgt i32 %n, 0
   1101   br i1 %cmp9, label %for.body, label %for.end
   1102 
   1103 for.body:                                         ; preds = %entry, %for.body
   1104   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   1105   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
   1106   %0 = load double* %arrayidx, align 8
   1107   %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
   1108   %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
   1109   store double %call, double* %arrayidx4, align 8
   1110   %indvars.iv.next = add i64 %indvars.iv, 1
   1111   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1112   %exitcond = icmp eq i32 %lftr.wideiv, %n
   1113   br i1 %exitcond, label %for.end, label %for.body
   1114 
   1115 for.end:                                          ; preds = %for.body, %entry
   1116   ret void
   1117 }
   1118 
   1119 ;CHECK-LABEL: @powi_f64_neg(
   1120 ;CHECK-NOT: llvm.powi.v4f64
   1121 ;CHECK: ret void
   1122 define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
   1123 entry:
   1124   %cmp9 = icmp sgt i32 %n, 0
   1125   br i1 %cmp9, label %for.body, label %for.end
   1126 
   1127 for.body:                                         ; preds = %entry, %for.body
   1128   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   1129   %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
   1130   %0 = load double* %arrayidx, align 8
   1131   %1 = trunc i64 %indvars.iv to i32
   1132   %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
   1133   %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
   1134   store double %call, double* %arrayidx4, align 8
   1135   %indvars.iv.next = add i64 %indvars.iv, 1
   1136   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1137   %exitcond = icmp eq i32 %lftr.wideiv, %n
   1138   br i1 %exitcond, label %for.end, label %for.body
   1139 
   1140 for.end:                                          ; preds = %for.body, %entry
   1141   ret void
   1142 }
   1143 
   1144 declare i64  @llvm.cttz.i64 (i64, i1) nounwind readnone
   1145 
   1146 ;CHECK-LABEL: @cttz_f64(
   1147 ;CHECK: llvm.cttz.v4i64
   1148 ;CHECK: ret void
   1149 define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
   1150 entry:
   1151   %cmp9 = icmp sgt i32 %n, 0
   1152   br i1 %cmp9, label %for.body, label %for.end
   1153 
   1154 for.body:                                         ; preds = %entry, %for.body
   1155   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   1156   %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
   1157   %0 = load i64* %arrayidx, align 8
   1158   %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
   1159   %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
   1160   store i64 %call, i64* %arrayidx4, align 8
   1161   %indvars.iv.next = add i64 %indvars.iv, 1
   1162   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1163   %exitcond = icmp eq i32 %lftr.wideiv, %n
   1164   br i1 %exitcond, label %for.end, label %for.body
   1165 
   1166 for.end:                                          ; preds = %for.body, %entry
   1167   ret void
   1168 }
   1169 
   1170 declare i64  @llvm.ctlz.i64 (i64, i1) nounwind readnone
   1171 
   1172 ;CHECK-LABEL: @ctlz_f64(
   1173 ;CHECK: llvm.ctlz.v4i64
   1174 ;CHECK: ret void
   1175 define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable {
   1176 entry:
   1177   %cmp9 = icmp sgt i32 %n, 0
   1178   br i1 %cmp9, label %for.body, label %for.end
   1179 
   1180 for.body:                                         ; preds = %entry, %for.body
   1181   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   1182   %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv
   1183   %0 = load i64* %arrayidx, align 8
   1184   %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
   1185   %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv
   1186   store i64 %call, i64* %arrayidx4, align 8
   1187   %indvars.iv.next = add i64 %indvars.iv, 1
   1188   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   1189   %exitcond = icmp eq i32 %lftr.wideiv, %n
   1190   br i1 %exitcond, label %for.end, label %for.body
   1191 
   1192 for.end:                                          ; preds = %for.body, %entry
   1193   ret void
   1194 }
   1195